texlive[60689] trunk: crossrefware (3oct21)

commits+karl at tug.org commits+karl at tug.org
Sun Oct 3 22:32:45 CEST 2021


Revision: 60689
          http://tug.org/svn/texlive?view=revision&revision=60689
Author:   karl
Date:     2021-10-03 22:32:45 +0200 (Sun, 03 Oct 2021)
Log Message:
-----------
crossrefware (3oct21)

Modified Paths:
--------------
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bbl2bib.pl
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibdoiadd.pl
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibmradd.pl
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/biburl2doi.pl
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibzbladd.pl
    trunk/Build/source/texk/texlive/linked_scripts/crossrefware/ltx2crossrefxml.pl
    trunk/Master/texmf-dist/doc/man/man1/bbl2bib.1
    trunk/Master/texmf-dist/doc/man/man1/bbl2bib.man1.pdf
    trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.1
    trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.man1.pdf
    trunk/Master/texmf-dist/doc/man/man1/bibmradd.1
    trunk/Master/texmf-dist/doc/man/man1/bibmradd.man1.pdf
    trunk/Master/texmf-dist/doc/man/man1/biburl2doi.1
    trunk/Master/texmf-dist/doc/man/man1/biburl2doi.man1.pdf
    trunk/Master/texmf-dist/doc/man/man1/bibzbladd.1
    trunk/Master/texmf-dist/doc/man/man1/bibzbladd.man1.pdf
    trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.1
    trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.man1.pdf
    trunk/Master/texmf-dist/doc/support/crossrefware/Makefile
    trunk/Master/texmf-dist/doc/support/crossrefware/README
    trunk/Master/texmf-dist/doc/support/crossrefware/crossrefware.pdf
    trunk/Master/texmf-dist/doc/support/crossrefware/head.ltx
    trunk/Master/texmf-dist/scripts/crossrefware/bbl2bib.pl
    trunk/Master/texmf-dist/scripts/crossrefware/bibdoiadd.pl
    trunk/Master/texmf-dist/scripts/crossrefware/bibmradd.pl
    trunk/Master/texmf-dist/scripts/crossrefware/biburl2doi.pl
    trunk/Master/texmf-dist/scripts/crossrefware/bibzbladd.pl
    trunk/Master/texmf-dist/scripts/crossrefware/ltx2crossrefxml.pl
    trunk/Master/texmf-dist/tex/latex/crossrefware/ltx2crossrefxml.cfg
    trunk/Master/tlpkg/libexec/ctan2tds

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bbl2bib.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bbl2bib.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bbl2bib.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -86,7 +86,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -98,7 +98,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibdoiadd.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibdoiadd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibdoiadd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -85,7 +85,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -97,7 +97,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibmradd.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibmradd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibmradd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -52,7 +52,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -64,7 +64,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/biburl2doi.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/biburl2doi.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/biburl2doi.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -39,7 +39,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2017  Boris Veytsman
+Copyright (C) 2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -51,7 +51,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibzbladd.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibzbladd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/bibzbladd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -52,7 +52,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -64,7 +64,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Build/source/texk/texlive/linked_scripts/crossrefware/ltx2crossrefxml.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/crossrefware/ltx2crossrefxml.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Build/source/texk/texlive/linked_scripts/crossrefware/ltx2crossrefxml.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -4,13 +4,13 @@
 
 =head1 NAME
 
-ltx2crossrefxml.pl - a tool for creation of XML files for submitting to crossref.
+ltx2crossrefxml.pl - create XML files for submitting to crossref.org
 
 =head1 SYNOPSIS
 
-ltx2crossrefxml [B<-c> I<config_file>]  [B<-o> I<output>] I<latex_file> I<latex_file> ...
+ltx2crossrefxml [B<-c> I<config_file>]  [B<-o> I<output_file>] [B<-input-is-xml>]
+                I<latex_file1> I<latex_file2> ...
 
-
 =head1 OPTIONS
 
 =over 4
@@ -17,89 +17,326 @@
 
 =item B<-c> I<config_file>
 
-Configuration file.  If this file is absent, some defaults are used.
+Configuration file.  If this file is absent, defaults are used.
 See below for its format.
 
+=item B<-o> I<output_file>
 
-=item B<-o> I<output>
-
 Output file.  If this option is not used, the XML is output to stdout.
 
+=item B<-rpi-is-xml>
+
+Do not transform author and title input strings, assume they are valid XML.
+
 =back
 
+The usual C<--help> and C<--version> options are also supported. Options
+can begin with either C<-> or C<-->, and ordered arbitrarily.
+
 =head1 DESCRIPTION
 
-The script takes a number of latex files and produces an XML file
-ready for submission to Crossref.  Each file must be previously processed
-by LaTeX with the newest C<resphilosophica> package: the package creates
-the file C<.rti> wtih the information about the bibliography.
+For each given I<latex_file>, this script reads C<.rpi> and (if they
+exist) C<.bbl> files and outputs corresponding XML that can be uploaded
+to Crossref (L<https://crossref.org>). Any extension of I<latex_file> is
+ignored, and I<latex_file> itself is not read (and need not even exist).
 
-The processing of reference list is at present rather limited: only so
-called unstructured references are produced.
+Each C<.rpi> file specifies the metadata for a single article to be
+uploaded to Crossref (a C<journal_article> element in their schema); an
+example is below. These files are output by the C<resphilosophica>
+package (L<https://ctan.org/pkg/resphilosophica>), but (as always) can
+also be created by hand or by whatever other method you implement.
 
+Any C<.bbl> files present are used for the citation information in the
+output XML. See the L<CITATIONS> section below.
+
+Unless C<--rpi-is-xml> is specified, for all text (authors, title,
+citations), standard TeX control sequences are replaced with plain text
+or UTF-8 or eliminated, as appropriate. The C<LaTeX::ToUnicode::convert>
+routine is used for this (L<https://ctan.org/pkg/bibtexperllibs>).
+Tricky TeX control sequences will almost surely not be handled
+correctly. If C<--rpi-is-xml> is given, the author and title strings
+from the rpi files are output as-is, assuming they are valid XML; no
+checking is done. Citation text from C<.bbl> files is always converted
+from LaTeX to plain text.
+
+This script just writes an XML file. It's up to you to actually do the
+uploading to Crossref; for example, you can use their Java tool 
+C<crossref-upload-tool.jar>
+(L<https://www.crossref.org/education/member-setup/direct-deposit-xml/https-post>).
+For the definition of their schema, see
+L<https://data.crossref.org/reports/help/schema_doc/4.4.2/index.html>
+(this is the schema version currently followed by this script).
+
 =head1 CONFIGURATION FILE FORMAT
 
-The configuration file is mostly self-explanatory: it has comments
-(starting with C<#>) and assginments in the form
+The configuration file is read as Perl code. Thus, comment lines
+starting with C<#> and blank lines are ignored. The other lines are
+typically assignments in the form (spaces are optional):
 
-   $field = value ;
+    $variable = value ;
 
+Usually the value is a C<"string"> enclosed in ASCII double-quote or
+single-quote characters, per Perl syntax. The idea is to specify the
+user-specific and journal-specific values needed for the Crossref
+upload. The variables which are used are these:
+
+    $depositorName = "Depositor Name";
+    $depositorEmail = 'depositor at example.org';
+    $registrant = 'Registrant';  # organization name
+    $fullTitle = "FULL TITLE";   # journal name
+    $issn = "1234-5678";         # required
+    $abbrevTitle = "ABBR. TTL."; # optional
+    $coden = "CODEN";            # optional
+
+
+For a given run, all C<.rpi> data read is assumed to belong to the
+journal that is specified in the configuration file. More precisely, the
+configuration data is written as a C<journal_metadata> element, with
+given C<full_title>, C<issn>, etc., and then each C<.rpi> is written as
+C<journal_issue> plus C<journal_article> elements.
+
+The configuration file can also define one Perl function:
+C<LaTeX_ToUnicode_convert_hook>. If it is defined, it is called at the
+beginning of the procedure that converts LaTeX text to Unicode, which is
+done with the L<LaTeX::ToUnicode> module, from the C<bibtexperllibs>
+package (L<https://ctan.org/pkg/bibtexperllibs>). The function must
+accept one string (the LaTeX text), and return one string (presumably
+the transformed string). The standard conversions are then applied to
+the returned string, so the configured function need only handle special
+cases, such as control sequences particular to the journal at hand.
+
+=head1 RPI FILE FORMAT
+
+Here's the (relevant part of the) C<.rpi> file corresponding to the
+C<rpsample.tex> example in the C<resphilosophica> package
+(L<https://ctan.org/pkg/resphilosophica>):
+
+  %authors=Boris Veytsman\and A. U. Th{\o }r\and C. O. R\"espondent
+  %title=A Sample Paper:\\ \emph  {A Template}
+  %year=2012
+  %volume=90
+  %issue=1--2
+  %startpage=1
+  %endpage=1
+  %doi=10.11612/resphil.A31245
+  %paperUrl=http://borisv.lk.net/paper12
+  %publicationType=full_text
+
+Other lines, some not beginning with %, are ignored (and not shown).
+For more details on processing, see the code.
+
+The C<%paperUrl> value is what will be associated with the given C<%doi>
+(output as the C<resource> element). Crossref strongly recommends that
+the url be for a so-called landing page, and not directly for a pdf
+(L<https://www.crossref.org/education/member-setup/creating-a-landing-page/>).
+Special case: if the url is not specified, 
+and the journal is I<S<Res Philosophica>>,
+a special-purpose search url using L<pdcnet.org> is returned.
+Any other journal must always specify this.
+
+The C<%authors> field is split at C<\and> (ignoring whitespace before
+and after), and output as the C<contributors> element, using
+C<sequence="first"> for the first listed, C<sequence="additional"> for
+the remainder.
+
+If the C<%publicationType> is not specified, it defaults to
+C<full_text>, since that has historically been the case; C<full_text>
+can also be given explicitly. The other values allowed by the Crossref
+schema are C<abstract_only> and C<bibliographic_record>. Finally, if the
+value is C<omit>, the C<publication_type> attribute is omitted entirely
+from the given C<journal_article> element.
+
+Each C<.rpi> must contain information for only one article, but multiple
+files can be read in a single run. It would not be difficult to support
+multiple articles in a single C<.rpi> file, but it makes debugging and
+error correction easier when each uploaded XML contains a single
+article.
+
+=head2 MORE ABOUT AUTHOR NAMES
+
+The three formats for names recognized are (not coincidentally) the same
+as BibTeX:
+
+   First von Last
+   von Last, First
+   von Last, Jr., First
+   
+The forms can be freely intermixed within a single C<%authors> line,
+separated with C<\and> (including the backslash). Commas as name
+separators are not supported, unlike BibTeX.
+
+In short, you may almost always use the first form; you shouldn't if
+either there's a Jr part, or the Last part has multiple tokens but
+there's no von part. See the C<btxdoc> (``BibTeXing'' by Oren Patashnik)
+document for details.
+
+In the C<%authors> line of a C<.rpi> file, some secondary directives are
+recognized, indicated by C<|> characters. Easiest to explain with an
+example:
+
+  %authors=|organization|\LaTeX\ Project Team \and Alex Brown|orcid=123
+
+Thus: 1) if C<|organization|> is specified, the author name will be output
+as an C<organization> contributor, instead of the usual C<person_name>,
+as the Crossref schema requires.
+
+2) If C<|orcid=I<value>|> is specified, the I<value> is output as an
+C<ORCID> element for that C<person_name>.
+
+These two directives, C<|organization>| and C<|orcid|> are mutually
+exclusive, because that's how the Crossref schema defines them. The C<=>
+sign after C<orcid> is required, while all spaces after the C<orcid>
+keyword are ignored. Other than that, the ORCID value is output
+literally. (E.g., the ORCID value of C<123> above is clearly invalid,
+but it would be output anyway, with no warning.)
+
+Extra C<|> characters, at the beginning or end of the entire C<%authors>
+string, or doubled in the middle, are accepted and ignored. Whitespace
+is ignored around all C<|> characters.
+
+=head1 CITATIONS
+
+Each C<.bbl> file corresponding to an input C<.rpi> file is read and
+used to output a C<citation_list> element for that C<journal_article> in
+the output XML. If no C<.bbl> file exists for a given C<.rpi>,
+no C<citation_list> is output for that article.
+
+The C<.bbl> processing is rudimentary: only so-called
+C<unstructured_citation> references are produced for Crossref, that is,
+the contents of the citation (each paragraph in the C<.bbl>) is dumped
+as a single flat string without markup.
+
+Bibliography text is unconditionally converted from TeX to XML, via the
+method described above. It is not unusual for the conversion to be
+incomplete or incorrect.  It is up to you to check for this; e.g., if
+any backslashes remain in the output, it is most likely an error.
+
+Furthermore, it is assumed that the C<.bbl> file contains a sequence of
+references, each starting with C<\bibitem{I<KEY>}> (which itself must be
+at the beginning of a line, preceded only by whitespace), and the whole
+bibliography ending with C<\end{thebibliography}> (similarly at the
+beginning of a line). A bibliography not following this format will not
+produce useful results. Bibliographies can be created by hand, or with
+BibTeX, or any other method.
+
+The C<key> attribute for the C<citation> element is taken as the I<KEY>
+argument to the C<\bibitem> command. The sequential number of the
+citation (1, 2, ...) is appended. The argument to C<\bibitem> can be
+empty (C<\bibitem{}>, and the sequence number will be used on its own.
+Although TeX will not handle empty C<\bibitem> keys, it can be
+convenient when creating a C<.bbl> purely for Crossref.
+
+The C<.rpi> file is also checked for the bibliography information, in
+this same format.
+
+Feature request: if anyone is interested in figuring out how to generate
+structured citations
+(L<https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#citation>)
+instead of these flat text dumps, that would be great.
+
 =head1 EXAMPLES
 
-  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex -o result.xml
+  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex \
+                      -o result.xml
 
-  ltx2crossrefxml.pl -c myconfig.cnf paper.tex -o paper.xml
+  ltx2crossrefxml.pl -c myconfig.cfg paper.tex -o paper.xml
 
 =head1 AUTHOR
 
-Boris Veytsman
+Boris Veytsman L<https://github.com/borisveytsman/crossrefware>
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2012-2016  Boris Veytsman
+Copyright (C) 2012-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
-L<http://www.gnu.org/licenses/gpl.html>.  There is NO WARRANTY, to the
+L<https://www.gnu.org/licenses/gpl.html>.  There is NO WARRANTY, to the
 extent permitted by law.
 
-
 =cut
 
  use strict;
+ use warnings;
 
+ use Cwd;
+ use File::Basename;
+ use File::Spec;
+
  BEGIN {
      # find files relative to our installed location within TeX Live
-     chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+     chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
      if (length($TLMaster)) {
 	 unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
      }
+     # find development bibtexperllibs in sibling checkout to this script,
+     # even if $0 is a symlink. All irrelevant when using from an installation.
+     my $real0 = Cwd::abs_path($0);
+     my $scriptdir = File::Basename::dirname($real0);
+     my $dev_btxperllibs = Cwd::abs_path("$scriptdir/../bibtexperllibs");
+     # we need the lib/ subdirectories inside ...
+     unshift (@INC, glob ("$dev_btxperllibs/*/lib")) if -d $dev_btxperllibs;
  }
+
  use POSIX qw(strftime);
+
  use BibTeX::Parser::Author;
- use LaTeX::ToUnicode qw (convert);
- use File::Basename;
- use File::Spec;
- my $USAGE="USAGE: $0 [-c config] [-o output] file1 file2 ...\n";
-my $VERSION = <<END;
-ltx2crossrefxml v2.2
-This is free software.  You may redistribute copies of it under the
-terms of the GNU General Public License
-http://www.gnu.org/licenses/gpl.html.  There is NO WARRANTY, to the
-extent permitted by law.
-$USAGE
+ use LaTeX::ToUnicode;
+
+ my $USAGE = <<END;
+Usage: $0 [-c CONFIG] [-o OUTPUT] [--rpi-is-xml] LTXFILE...
+
+Convert .rpi and (if any are present) .bbl files corresponding to each
+LTXFILE to xml, for submitting to crossref.org. The LTXFILE is not read
+(and need not even exist); any extension it has is replaced by .rpi and
+.bbl.
+
+The .rpi files are plain text, with values on lines beginning with %, as
+output by (for example) the resphilosophica LaTeX package. The .bbl
+files are as output by BibTeX. Both are also commonly created by hand.
+The documentation for this script has examples.
+
+The xml is written to standard output by default; the -o (--output)
+option overrides this.
+
+If the -c (--config) option is given, the given file is read before any
+processing is done. This is used to define journal-specific defaults.
+
+The usual --help and --version options are also supported.
+
+For an example of using this script and associatd code, see the TUGboat
+processing at
+https://github.com/TeXUsersGroup/tugboat/tree/trunk/capsules/crossref.
+
+Development sources, bug tracker: https://github.com/borisveytsman/crossrefware
+Releases: https://ctan.org/pkg/crossrefware
 END
- use Getopt::Std;
+
+ my $VERSION = <<END;
+ltx2crossrefxml (crossrefware) 2.51
+This is free software: you are free to change and redistribute it, under
+the terms of the GNU General Public License
+http://www.gnu.org/licenses/gpl.html (any version).
+There is NO WARRANTY, to the extent permitted by law.
+
+Written by Boris Veytsman.
+END
+ use Getopt::Long;
  my %opts;
- getopts('c:o:hV',\%opts) or die $USAGE;
+
+ GetOptions(
+   "config|c=s" => \($opts{c}),
+   "output|o=s" => \($opts{o}),
+   "rpi-is-xml!"=> \($opts{xi}),
+   "version|V"  => \($opts{V}),
+   "help|?"     => \($opts{h})) || pod2usage(1);
+
+ if ($opts{h}) { print "$USAGE\n$VERSION"; exit 0; } 
+ if ($opts{V}) { print $VERSION; exit 0; } 
+
  use utf8;
  binmode(STDOUT, ":utf8");
 
-if ($opts{h} || $opts{V}){
-    print $VERSION;
-    exit 0;
-}
-
  ################################################################
  # Defaults and parameters
  ################################################################
@@ -107,32 +344,36 @@
  *OUT=*STDOUT;
  
  if (defined($opts{o})) {
-     open (OUT, ">$opts{o}") or die "Cannot open file $opts{o} for writing\n";
+     open (OUT, ">$opts{o}") or die "open($opts{o}) for writing failed: $!\n";
      binmode(OUT, ":utf8")
  }
 
 
- our $depositorName='DEPOSITOR_NAME';
- our $depositorEmail='DEPOSITOR_EMAIL';
- our $registrant='REGISTRANT';
+ our $depositorName = 'DEPOSITOR_NAME';
+ our $depositorEmail = 'DEPOSITOR_EMAIL';
+ our $registrant = 'REGISTRANT';
  our $fullTitle = "FULL TITLE";
- our $abbrevTitle = "ABBR. Title.";
- our $issn = "1234-5678";
+ our $abbrevTitle = "ABBR. TTL.";
+ our $issn = "0000-0000";
  our $coden = "CODEN";
- our $batchId="ltx2crossref$$";
- our $timestamp=strftime("%Y%m%d%H%M%S", gmtime);
+ our $timestamp = strftime("%Y%m%d%H%M%S", gmtime);
+ # use timestamp in batchid, since the value is supposed to be unique
+ # for every submission to crossref by a given publisher.
+ # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#doi_batch_id
+ our $batchId="ltx2crossref-$timestamp-$$";
 
 
  if ($opts{c}) {
      if (-r $opts{c}) {
-	 require $opts{c};
+         # if config arg is absolute, fine; if not, prepend "./" as slightly
+         # less troublesome than putting "." in the @INC path.
+         my $rel = (File::Spec->file_name_is_absolute($opts{c}) ? "" : "./");
+	 require "$rel$opts{c}";
      } else {
-	 die "Cannot read options $opts{c}.  $USAGE";
+	 die "Cannot read config file $opts{c}. Goodbye.";
      }
  }
 
-
-
  PrintHead();
 
  # 
@@ -154,11 +395,9 @@
 	     }
 	 }
      }
-
  }
 
  PrintTail();
-
  exit(0);
 
 
@@ -165,106 +404,150 @@
 #####################################################
 #  Printing the head and the tail
 #####################################################
-
 sub PrintHead {
+    # do not output the <coden> or <abbrev_title> if the journal doesn't
+    # have them.
+    my $indent = "        ";
+    my $coden_out = $coden ne "CODEN" ? "\n$indent<coden>$coden</coden>" : "";
+    my $abbrev_title_out = $abbrevTitle ne "ABBR. TTL."
+        ? "\n$indent<abbrev_title>$abbrevTitle</abbrev_title>"
+        : "";
 
-
+    # as of schema version 4.3.4, crossref renamed the <name> element
+    # inside <depositor> to <depositor_name>. Sigh. Something to take
+    # into account with older schemas.
+    # https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/schema-versions/
     print OUT <<END;
-<doi_batch xmlns="http://www.crossref.org/schema/4.3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="4.3.0" xsi:schemaLocation="http://www.crossref.org/schema/4.3.0 http://www.crossref.org/schema/deposit/crossref4.3.0.xsd">
+<doi_batch xmlns="http://www.crossref.org/schema/4.4.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="4.4.2" xsi:schemaLocation="http://www.crossref.org/schema/4.4.2 http://www.crossref.org/schema/deposit/crossref4.4.2.xsd">
   <head>
     <doi_batch_id>$batchId</doi_batch_id>
     <timestamp>$timestamp</timestamp>
     <depositor>
-      <name>$depositorName</name>
+      <depositor_name>$depositorName</depositor_name>
       <email_address>$depositorEmail</email_address>
     </depositor>
     <registrant>$registrant</registrant>
   </head>
-  <body>
-    <journal>
-      <journal_metadata language="en">
-        <full_title>$fullTitle</full_title>
-        <abbrev_title>$abbrevTitle</abbrev_title>
-	<issn>$issn</issn>
-	<coden>$coden</coden>
-      </journal_metadata>
+  <body><journal>
+    <journal_metadata language="en">
+      <full_title>$fullTitle</full_title>$abbrev_title_out
+      <issn>$issn</issn>$coden_out	
+    </journal_metadata>
 END
-
 }
 
 sub PrintTail {
     print OUT <<END;
-    </journal>
-  </body>
+  </journal></body>
 </doi_batch>
 END
 
-return;
+    return;
 }
 
 
 #######################################################
-#  Adding one paper
+#  Adding one paper from $file.rpi and .bbl to global %papers.
 #######################################################
-
 sub AddPaper {
     my $file = shift;
     my ($name,$path,$suffix) = fileparse($file, '\.[^\.]*$');
     my $rpifile = File::Spec->catfile($path, "$name.rpi");
-    open (RPI, $rpifile) or die 
-     "Cannot find $rpifile.  Did you process $file?\n";
+    open (RPI, $rpifile)
+      or die "open($rpifile) failed: $! (did you process $file?)\n";
     my %data;
     while (<RPI>) {
 	chomp;
         if (/^%([^=]*)\s*=\s*(.*)\s*$/) {
-           $data{$1}=$2;
+           if (exists $data{$1}) {
+             warn "$rpifile:$.: already saw data{$1}=$data{$1};"
+                  . " an .rpi file should have data for only one article,"
+                  . " but overwriting with `$2' anyway.\n";
+           }
+           $data{$1} = $2;
         }
     }
     close RPI;
+    
+    # look for bibliographies in both the .rpi and any .bbl file.
     my @bibliography;
     foreach my $bibfile ($file, File::Spec->catfile($path, "$name.bbl")) {
-         @bibliography = (@bibliography, 
-          AddBibliography($bibfile));
+         @bibliography = (@bibliography, AddBibliography($bibfile));
     }
-    $data{'bibliography'}=\@bibliography;
+    $data{'bibliography'} = \@bibliography;
+
+    # Die if the fields we use unconditionally are empty. Not all of
+    # them are required by the schema, but we can wait to generalize.
+    foreach my $field (qw(title year volume issue startpage endpage doi)) {
+        if (! $data{$field}) {
+            die ("$0: field must not be empty: $field\n  "
+                 . &debug_hash_as_string("whole hash", %data));
+        }
+    }
+
     push @{$papers{$data{year}}->{$data{volume}}->{$data{issue}}}, \%data;
 }
 
 ############################################################## 
-# Reading a list of papers and adding  it to the
-# bibliography
+# Reading a list of papers from BIBFILE and adding it to the
+# bibliography. Each item is assumed to start with
+# \bibitem{KEY} and the whole bib to end with \end{thebibliography}.
+# 
+# We return a list of hashes, each hash with a single key, the citation
+# key, and its value a flat string of the entry.
+# 
+# No conversion of the text is done here.
 ##############################################################
-
 sub AddBibliography {
     my $bibfile = shift;
     open (BIB, $bibfile) or return;
+    
     my $insidebibliography = 0;
-    my $currpaper="";
+    my $currpaper = ""; # that is, the current bib entry
+    my $bibno = 0;
     my @result;
     my $key;
     while (<BIB>) {
 	chomp;
-	if (/^\s*\\bibitem(?:\[.*\])?+\{(.+)\}/) {
+	next if /^\s*%/; # TeX comment line
+	s/[ \t]%.*//;    # remove TeX comment
+	#
+	# allow empty \bibitem key for the sake of handwritten bbls.
+	# Similarly, might be more stuff on the line when handwritten.
+	if (s/^\s*\\bibitem(?:\[.*?\])?+\s*\{(.*?)\}//) {
+	    my $newkey = $1;
 	    if ($insidebibliography) {
 		if ($currpaper) {
-		    my %paperhash;
-		    $paperhash{$key}=$currpaper;
+                    # Append the current sequence number for this citation,
+                    # since that's what Crossref recommends (sort of).
+                    # For prettiness, if the key is otherwise empty,
+                    # don't include a dash beforehand.
+		    $bibno++;
+                    $key .= ($key ? "-" : "") . $bibno;
+                    #
+                    my %paperhash;
+		    $paperhash{$key} = $currpaper;
 		    push @result, \%paperhash;
 		}
 	    }
-	    $key = $1;
-	    $currpaper="";
-	    $insidebibliography=1;
+	    # The citation key (required by schema) starts as the bibitem key.
+	    $key = $newkey;
+	    
+	    $currpaper = $_;
+	    $insidebibliography = 1;
 	    next;
 	}
 	if (/^\s*\\end\{thebibliography\}/) {
 	    if ($currpaper) {
-		    my %paperhash;
-		    $paperhash{$key}=$currpaper;
-		    push @result, \%paperhash;
+	        $bibno++;
+                $key .= ($key ? "-" : "") . $bibno;
+                #
+		my %paperhash;
+		$paperhash{$key} = $currpaper;
+		push @result, \%paperhash;
 	    }
-	    $currpaper="";
-	    $insidebibliography=0;
+	    $currpaper = "";
+	    $insidebibliography = 0;
 	    next;
 	}
 	if ($insidebibliography) {
@@ -272,6 +555,14 @@
 	}
     }
     close BIB;
+    
+    # We look in the .rpi files too, which will generally have none.
+    if (@result == 0 && $bibfile =~ /\.bbl$/) {
+        warn "$0: no \\bibitems found in: $bibfile\n";
+    } elsif ($insidebibliography) {
+        warn "$0: no \\end{thebibliography} found in: $bibfile\n";
+        warn "$0:   so the last bib entry is missing.\n";
+    }
     return @result;
 }
 
@@ -278,19 +569,16 @@
 #################################################################
 #  Printing information about one issue
 #################################################################
-
 sub PrintIssueHead {
     my ($year, $volume, $issue) = @_;
     print OUT <<END;
-      <journal_issue>
-        <publication_date media_type="print">
-          <year>$year</year>
-        </publication_date>
-        <journal_volume>
-          <volume>$volume</volume>
-        </journal_volume>
-        <issue>$issue</issue>
-      </journal_issue>
+    <journal_issue>
+      <publication_date media_type="print">
+        <year>$year</year>
+      </publication_date>
+      <journal_volume><volume>$volume</volume></journal_volume>
+      <issue>$issue</issue>
+    </journal_issue>
 END
 }
 
@@ -299,127 +587,206 @@
 ###############################################################
 sub PrintPaper {
     my $paper = shift;
-    my $title=convert($paper->{title});
-    my $url=GetURL($paper);
+    my $title = SanitizeText($paper->{title});
+    my $url = GetURL($paper);
+    my $publication_type = GetPublicationType($paper->{publicationType});
+    
+    &TitleCheck($title);
     print OUT <<END;
-      <journal_article publication_type="full_text">
-        <titles>
-           <title>
-             $title
-           </title>
-        </titles>
-        <contributors>
+    <journal_article$publication_type>
+      <titles>
+        <title>$title</title>
+      </titles>
+      <contributors>
 END
-my @authors = split /\s*\\and\s*/, $paper->{authors};
-    my $seq='first';
+    my @authors = split /\s*\\and\s*/, $paper->{authors};
+    my $seq = 'first';
     foreach my $author (@authors) {
-	print OUT <<END;
-          <person_name sequence="$seq" contributor_role="author">
-END
-$seq='additional';
-	PrintAuthor($author);
-	print OUT <<END;
-          </person_name>
-END
-
+	PrintAuthor($author, $seq);
+        $seq = 'additional';
     }
 
     print OUT <<END;
-        </contributors>
-        <publication_date media_type="print">
-           <year>$paper->{year}</year>
-        </publication_date>
-        <pages>
-           <first_page>$paper->{startpage}</first_page>
-           <last_page>$paper->{endpage}</last_page>
-        </pages>
-        <doi_data>
-          <doi>$paper->{doi}</doi>
-          <timestamp>$timestamp</timestamp>
-	  <resource>$url</resource>
-        </doi_data>
+      </contributors>
+      <publication_date media_type="print">
+        <year>$paper->{year}</year>
+      </publication_date>
+      <pages>
+        <first_page>$paper->{startpage}</first_page>
+        <last_page>$paper->{endpage}</last_page>
+      </pages>
+      <doi_data>
+        <doi>$paper->{doi}</doi>
+        <timestamp>$timestamp</timestamp>
+        <resource>$url</resource>
+      </doi_data>
 END
 
-if (scalar(@{$paper->{bibliography}})) {
+    if (scalar(@{$paper->{bibliography}})) {
     print OUT <<END;
-        <citation_list>
+      <citation_list>
 END
     foreach my $citation (@{$paper->{bibliography}}) {
 	PrintCitation($citation);
     }
     print OUT <<END;
-        </citation_list>
+      </citation_list>
 END
-}
+    }
 
     print OUT <<END;
-      </journal_article>
+    </journal_article>
 END
+}
 
 
+###############################################################
+# Crossref <title> strings can contain a few so-called "face" HTML
+# commands. Complain if they have anything anything else.
+# schema doc: https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#title
+#   face doc: https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/face-markup/
+# mathml doc: https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/including-mathml-in-deposits/
+# 
+# We don't technically validate the string, e.g., mismatched tags will
+# go unnoticed here. The real validator at Crossref will catch whatever.
+###############################################################
+sub TitleCheck {
+    my $title = shift;
+    my $orig_title = $title;
+    
+    foreach my $tag (qw(b em i ovl scp strong sub sup tt u)) {
+        $title =~ s,<\s*/?$tag\s*>,,g; # eradicate <tag> and </tag>
+    }
+
+    # <font> can (maybe?) take lots of extra attributes:
+    $title =~ s,<\s*/?font.*?>,,g;
+
+    # MathML is too complex; just wipe it all out. If there are
+    # problems, the real validator at Crossref will complain.
+    $title =~ s,<\s*mml:math.*/mml:math\s*>,,g;
+    
+    # No tags should remain.
+    if ($title =~ /</) {
+       die "$0: invalid tags remaining in: $title (original: $orig_title)\n";
+    }
 }
 
-
 ###############################################################
-#  Sanitization of a text string
+# Simplistic TeX-to-html
+# (no-op for rpi text if --input-is-xml was given).
 ###############################################################
 sub SanitizeText {
     my $string = shift;
-    $string = convert($string);
-    $string =~ s/\\newblock//g;
-    $string =~ s/\\bgroup//g;
-    $string =~ s/\\egroup//g;
-    $string =~ s/\\scshape//g;
-    $string =~ s/\\urlprefix//g;
-    $string =~ s/\\emph//g;
-    $string =~ s/\\textbf//g;
-    $string =~ s/\\enquote//g;
-    $string =~ s/\\url/URL: /g;
-    $string =~ s/\\doi/DOI: /g;
-    $string =~ s/\\\\/ /g;
-    $string =~ s/\$//g;
-    $string =~ s/\\checkcomma/,/g;
-    $string =~ s/~/ /g;
-    $string =~ s/[\{\}]//g;
+    return $string if $opts{xi}; # do nothing if --rpi-is-xml
+    return SanitizeTextAlways($string);
+}
+
+# Split into two functions so we can sanitize bbl but not rpi.
+sub SanitizeTextAlways {
+    my $string = shift;
+   
+    # pass user hook subroutine if defined.
+    my @hook = (defined(&{"LaTeX_ToUnicode_convert_hook"}))
+               ? ("hook" => \&LaTeX_ToUnicode_convert_hook)
+               : ();
+
+    # conversion of accented control sequences to characters, etc.
+    # Let's use &#uuuu; entities instead of literal UTF-8; Crossref
+    # recommends it, and it's easier for postprocessing.
+    $string = LaTeX::ToUnicode::convert($string, entities => 1, @hook);
+    
     return $string;
 }
 
 ################################################################
-# Printing one author
+# Printing one author in arg ORIG_AUTHOR, in sequence SEQ.
 ################################################################
 sub PrintAuthor {
-    my $author=shift;
+    my ($orig_author,$seq) = @_;
 
+    # recognize extra directives, either |organization|
+    # or |orcid=<value>|.
+    my $organization = 0;
+    my $orcid = 0;
+    my $author = "";
+    my @name_parts = split (/\|/, $orig_author);
+    foreach my $np (@name_parts) {
+        $np =~ s/^\s*(.*)\s*$/$1/s; # remove leading and trailing whitespace
+        if ($np eq "organization") {
+            $organization = 1;
+        } elsif ($np =~ /^orcid/) {
+            ($orcid = $np) =~ s/^orcid\s*=//;
+            $orcid =~ s/\s//g; # remove all whitespace from value
+            if (! $orcid) {
+                warn "$0: ignoring empty orcid specified in: $orig_author\n";
+            }
+        } elsif (! $np) {
+            # silently ignore empty part, as in ||
+        } else {
+            if ($author) {
+                die ("$0: already saw author name `$author', should not"
+                     . " have second: $np\n");
+            }
+            $author = $np;
+        }
+    }
+    
+    if ($organization && $orcid) {
+        die ("$0: orcid and organization cannot both be present in:"
+             . " $orig_author\n");
+    }
+
+    # for organizations, nothing to do but output it.
+    if ($organization) {
+        my $line = SanitizeText($author);
+        print OUT <<END;
+        <organization>$line</organization>
+END
+        return;
+    }
+    
+    # what's left is the common case of a person, not an organization.
+    print OUT <<END;
+        <person_name sequence="$seq" contributor_role="author">
+END
+
+
     my $person=new BibTeX::Parser::Author ($author);
 
     if ($person->first) {
-	my $line = $person->first;
-	$line = SanitizeText($line);
-	print OUT <<END;
-            <given_name>$line</given_name>
+        my $line = $person->first;
+        $line = SanitizeText($line);
+        print OUT <<END;
+          <given_name>$line</given_name>
 END
-
     }
 
     if ($person->last) {
-	my $line = SanitizeText($person->last);
-	if ($person->von) {
-	    $line = SanitizeText($person->von)." $line";
-	}
-	print OUT <<END;
-            <surname>$line</surname>
+        my $line = SanitizeText($person->last);
+        if ($person->von) {
+            $line = SanitizeText($person->von)." $line";
+        }
+        print OUT <<END;
+          <surname>$line</surname>
 END
-
     }
 
     if ($person->jr) {
-	my $line = SanitizeText($person->jr);
-	print OUT <<END;
-            <suffix>$line</suffix>
+        my $line = SanitizeText($person->jr);
+        print OUT <<END;
+          <suffix>$line</suffix>
 END
+    }
 
+    if ($orcid) {
+        print OUT <<END;
+          <ORCID>https://orcid.org/$orcid</ORCID>
+END
     }
 
+    print OUT <<END;
+        </person_name>
+END
 }
 
 #############################################################
@@ -427,35 +794,92 @@
 #############################################################
 sub PrintCitation {
     my $paperhash=shift;
+
     foreach my $key (keys (%{$paperhash})) {
-	my $citation=$paperhash->{$key};
-	$citation=SanitizeText($citation);
+	my $citation = $paperhash->{$key};
+	$citation = SanitizeTextAlways($citation);
 
 	print OUT <<END;
-          <citation key="$key">
-             <unstructured_citation>
-               $citation
-             </unstructured_citation>
-          </citation>
+        <citation key="$key"><unstructured_citation>
+          $citation
+        </unstructured_citation></citation>
 END
+    }
 }
 
+##############################################################
+#  Return publication_type attribute for <journal_article>, given $PUBTYPE.
+#  https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#publication_type.atts
+#  
+#  If not specified in input, return " publication_type=full_text" since
+#  it was hardwired that way before. If set to "omit", return empty
+#  string. Else return " publication_type=$PUBTYPE>, if the value
+#  is valid. If not, die. (Leading space is so result can be directly used.)
+##############################################################
+sub GetPublicationType {
+    my $pubtype = shift;
+    my $ret;
+
+    if (! $pubtype) {
+        $ret = "full_text"; 
+    } elsif ($pubtype eq "omit") {
+        $ret = "";
+    } elsif ($pubtype =~ /^(abstract_only|full_text|bibliographic_record)$/) {
+        $ret = $pubtype;
+    } else {
+        die "$0: invalid publication_type: $pubtype\n";
+    }
+    
+    $ret = " publication_type=\"$ret\"" if $ret;
+    return $ret;
 }
 
 ##############################################################
-#  Calculating URL
+#  Calculating URL. Res Philosophica gets special treatment.
 ##############################################################
-
 sub GetURL {
     my $paper = shift;
 
     my $result;
     if ($paper->{paperUrl}) {
-	$result= $paper->{paperUrl}
+	$result = $paper->{paperUrl}
+
+    } elsif ($paper->{doi} =~ m,^10\.11612/resphil,) {
+	my $doi = $paper->{doi};
+	$result = 'http://www.pdcnet.org/oom/service?url_ver=Z39.88-2004&rft_val_fmt=&rft.imuse_synonym=resphilosophica&rft.DOI='.$doi.'&svc_id=info:www.pdcnet.org/collection';
+
     } else {
-	my $doi=$paper->{doi};
-	$result= 'http://www.pdcnet.org/oom/service?url_ver=Z39.88-2004&rft_val_fmt=&rft.imuse_synonym=resphilosophica&rft.DOI='.$doi.'&svc_id=info:www.pdcnet.org/collection';
+        die ("$0: paperUrl field is required\n  "
+             . &debug_hash_as_string("whole hash", $paper));
     }
-    $result =~ s/&/&/g;
+    
+    $result =~ s/&/&#x26;/g; # amp(ersand)
     return $result;
 }
+
+
+##############################################################
+#  debug_hash_as_string($LABEL, HASH)
+#
+# Return LABEL followed by HASH elements, followed by a newline, as a
+# single string. If HASH is a reference, it is followed (but no recursive
+# derefencing).
+###############################################################
+sub debug_hash_as_string {
+  my ($label) = shift;
+  my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
+
+  my $str = "$label: {";
+  my @items = ();
+  for my $key (sort keys %hash) {
+    my $val = $hash{$key};
+    $val = ".undef" if ! defined $val;
+    $key =~ s/\n/\\n/g;
+    $val =~ s/\n/\\n/g;
+    push (@items, "$key:$val");
+  }
+  $str .= join (",", @items);
+  $str .= "}";
+
+  return "$str\n";
+}

Modified: trunk/Master/texmf-dist/doc/man/man1/bbl2bib.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bbl2bib.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/bbl2bib.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "bbl2bib 1"
-.TH bbl2bib 1 "2018-04-29" "" "CROSSREF LIBRARY"
+.TH bbl2bib 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -158,7 +158,7 @@
 .Sp
 Normally \f(CW\*(C`bbl2bib\*(C'\fR recognizes \s-1URL\s0 fields of the kind
 \&\f(CW\*(C`http://dx.doi.org\*(C'\fR and their variants and converts them to \s-1DOI\s0
-fields (see also \fIbiburl2doi\fR\|(1) script).  The switch \fB\-u\fR
+fields (see also \fBbiburl2doi\fR\|(1) script).  The switch \fB\-u\fR
 suppresses this cleanup.
 .SH "DESCRIPTION"
 .IX Header "DESCRIPTION"
@@ -205,7 +205,7 @@
 Boris Veytsman
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2014\-2017  Boris Veytsman
+Copyright (C) 2014\-2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License

Modified: trunk/Master/texmf-dist/doc/man/man1/bbl2bib.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "bibdoiadd 1"
-.TH bibdoiadd 1 "2017-11-26" "" "CROSSREF LIBRARY"
+.TH bibdoiadd 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -168,7 +168,7 @@
 .IX Header "DESCRIPTION"
 The script reads a BibTeX file.  It checks whether the entries have
 DOIs.  If not, it tries to contact http://www.crossref.org to get the
-corresponding \s-1DOI. \s0 The result is a BibTeX file with the fields
+corresponding \s-1DOI.\s0  The result is a BibTeX file with the fields
 \&\f(CW\*(C`doi=...\*(C'\fR added.
 .PP
 The name of the output file is either set by the \fB\-o\fR option or 
@@ -206,7 +206,7 @@
 Boris Veytsman
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2014\-2017  Boris Veytsman
+Copyright (C) 2014\-2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License

Modified: trunk/Master/texmf-dist/doc/man/man1/bibdoiadd.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/bibmradd.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bibmradd.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/bibmradd.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "bibmradd 1"
-.TH bibmradd 1 "2017-11-18" "" "CROSSREF LIBRARY"
+.TH bibmradd 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -174,7 +174,7 @@
 Boris Veytsman
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2014\-2017  Boris Veytsman
+Copyright (C) 2014\-2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License

Modified: trunk/Master/texmf-dist/doc/man/man1/bibmradd.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/biburl2doi.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/biburl2doi.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/biburl2doi.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "biburl2doi 1"
-.TH biburl2doi 1 "2017-11-26" "" "CROSSREF LIBRARY"
+.TH biburl2doi 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -162,7 +162,7 @@
 Boris Veytsman
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2017  Boris Veytsman
+Copyright (C) 2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License

Modified: trunk/Master/texmf-dist/doc/man/man1/biburl2doi.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/bibzbladd.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bibzbladd.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/bibzbladd.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "bibzbladd 1"
-.TH bibzbladd 1 "2018-04-15" "" "CROSSREF LIBRARY"
+.TH bibzbladd 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -174,7 +174,7 @@
 Boris Veytsman
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2014\-2017  Boris Veytsman
+Copyright (C) 2014\-2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License

Modified: trunk/Master/texmf-dist/doc/man/man1/bibzbladd.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.1	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.1	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.40)
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -46,7 +46,7 @@
 .ie \n(.g .ds Aq \(aq
 .el       .ds Aq '
 .\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" If the F register is >0, we'll generate index entries on stderr for
 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 .\" output yourself in some meaningful fashion.
@@ -56,12 +56,12 @@
 ..
 .nr rF 0
 .if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-.    if \nF \{
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
 .        de IX
 .        tm Index:\\$1\t\\n%\t"\\$2"
 ..
-.        if !\nF==2 \{
+.        if !\nF==2 \{\
 .            nr % 0
 .            nr F 2
 .        \}
@@ -133,57 +133,253 @@
 .\" ========================================================================
 .\"
 .IX Title "ltx2crossrefxml 1"
-.TH ltx2crossrefxml 1 "2017-11-18" "" "CROSSREF LIBRARY"
+.TH ltx2crossrefxml 1 "2021-10-02" "" "LATEX CROSSREFWARE"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
 .nh
 .SH "NAME"
-ltx2crossrefxml.pl \- a tool for creation of XML files for submitting to crossref.
+ltx2crossrefxml.pl \- create XML files for submitting to crossref.org
 .SH "SYNOPSIS"
 .IX Header "SYNOPSIS"
-ltx2crossrefxml [\fB\-c\fR \fIconfig_file\fR]  [\fB\-o\fR \fIoutput\fR] \fIlatex_file\fR \fIlatex_file\fR ...
+ltx2crossrefxml [\fB\-c\fR \fIconfig_file\fR]  [\fB\-o\fR \fIoutput_file\fR] [\fB\-input\-is\-xml\fR]
+                \fIlatex_file1\fR \fIlatex_file2\fR ...
 .SH "OPTIONS"
 .IX Header "OPTIONS"
 .IP "\fB\-c\fR \fIconfig_file\fR" 4
 .IX Item "-c config_file"
-Configuration file.  If this file is absent, some defaults are used.
+Configuration file.  If this file is absent, defaults are used.
 See below for its format.
-.IP "\fB\-o\fR \fIoutput\fR" 4
-.IX Item "-o output"
+.IP "\fB\-o\fR \fIoutput_file\fR" 4
+.IX Item "-o output_file"
 Output file.  If this option is not used, the \s-1XML\s0 is output to stdout.
+.IP "\fB\-rpi\-is\-xml\fR" 4
+.IX Item "-rpi-is-xml"
+Do not transform author and title input strings, assume they are valid \s-1XML.\s0
+.PP
+The usual \f(CW\*(C`\-\-help\*(C'\fR and \f(CW\*(C`\-\-version\*(C'\fR options are also supported. Options
+can begin with either \f(CW\*(C`\-\*(C'\fR or \f(CW\*(C`\-\-\*(C'\fR, and ordered arbitrarily.
 .SH "DESCRIPTION"
 .IX Header "DESCRIPTION"
-The script takes a number of latex files and produces an \s-1XML\s0 file
-ready for submission to Crossref.  Each file must be previously processed
-by LaTeX with the newest \f(CW\*(C`resphilosophica\*(C'\fR package: the package creates
-the file \f(CW\*(C`.rti\*(C'\fR wtih the information about the bibliography.
+For each given \fIlatex_file\fR, this script reads \f(CW\*(C`.rpi\*(C'\fR and (if they
+exist) \f(CW\*(C`.bbl\*(C'\fR files and outputs corresponding \s-1XML\s0 that can be uploaded
+to Crossref (<https://crossref.org>). Any extension of \fIlatex_file\fR is
+ignored, and \fIlatex_file\fR itself is not read (and need not even exist).
 .PP
-The processing of reference list is at present rather limited: only so
-called unstructured references are produced.
+Each \f(CW\*(C`.rpi\*(C'\fR file specifies the metadata for a single article to be
+uploaded to Crossref (a \f(CW\*(C`journal_article\*(C'\fR element in their schema); an
+example is below. These files are output by the \f(CW\*(C`resphilosophica\*(C'\fR
+package (<https://ctan.org/pkg/resphilosophica>), but (as always) can
+also be created by hand or by whatever other method you implement.
+.PP
+Any \f(CW\*(C`.bbl\*(C'\fR files present are used for the citation information in the
+output \s-1XML.\s0 See the \s-1CITATIONS\s0 section below.
+.PP
+Unless \f(CW\*(C`\-\-rpi\-is\-xml\*(C'\fR is specified, for all text (authors, title,
+citations), standard TeX control sequences are replaced with plain text
+or \s-1UTF\-8\s0 or eliminated, as appropriate. The \f(CW\*(C`LaTeX::ToUnicode::convert\*(C'\fR
+routine is used for this (<https://ctan.org/pkg/bibtexperllibs>).
+Tricky TeX control sequences will almost surely not be handled
+correctly. If \f(CW\*(C`\-\-rpi\-is\-xml\*(C'\fR is given, the author and title strings
+from the rpi files are output as-is, assuming they are valid \s-1XML\s0; no
+checking is done. Citation text from \f(CW\*(C`.bbl\*(C'\fR files is always converted
+from LaTeX to plain text.
+.PP
+This script just writes an \s-1XML\s0 file. It's up to you to actually do the
+uploading to Crossref; for example, you can use their Java tool 
+\&\f(CW\*(C`crossref\-upload\-tool.jar\*(C'\fR
+(<https://www.crossref.org/education/member\-setup/direct\-deposit\-xml/https\-post>).
+For the definition of their schema, see
+<https://data.crossref.org/reports/help/schema_doc/4.4.2/index.html>
+(this is the schema version currently followed by this script).
 .SH "CONFIGURATION FILE FORMAT"
 .IX Header "CONFIGURATION FILE FORMAT"
-The configuration file is mostly self-explanatory: it has comments
-(starting with \f(CW\*(C`#\*(C'\fR) and assginments in the form
+The configuration file is read as Perl code. Thus, comment lines
+starting with \f(CW\*(C`#\*(C'\fR and blank lines are ignored. The other lines are
+typically assignments in the form (spaces are optional):
 .PP
 .Vb 1
-\&   $field = value ;
+\&    $variable = value ;
 .Ve
+.PP
+Usually the value is a \f(CW"string"\fR enclosed in \s-1ASCII\s0 double-quote or
+single-quote characters, per Perl syntax. The idea is to specify the
+user-specific and journal-specific values needed for the Crossref
+upload. The variables which are used are these:
+.PP
+.Vb 7
+\&    $depositorName = "Depositor Name";
+\&    $depositorEmail = \*(Aqdepositor at example.org\*(Aq;
+\&    $registrant = \*(AqRegistrant\*(Aq;  # organization name
+\&    $fullTitle = "FULL TITLE";   # journal name
+\&    $issn = "1234\-5678";         # required
+\&    $abbrevTitle = "ABBR. TTL."; # optional
+\&    $coden = "CODEN";            # optional
+.Ve
+.PP
+For a given run, all \f(CW\*(C`.rpi\*(C'\fR data read is assumed to belong to the
+journal that is specified in the configuration file. More precisely, the
+configuration data is written as a \f(CW\*(C`journal_metadata\*(C'\fR element, with
+given \f(CW\*(C`full_title\*(C'\fR, \f(CW\*(C`issn\*(C'\fR, etc., and then each \f(CW\*(C`.rpi\*(C'\fR is written as
+\&\f(CW\*(C`journal_issue\*(C'\fR plus \f(CW\*(C`journal_article\*(C'\fR elements.
+.PP
+The configuration file can also define one Perl function:
+\&\f(CW\*(C`LaTeX_ToUnicode_convert_hook\*(C'\fR. If it is defined, it is called at the
+beginning of the procedure that converts LaTeX text to Unicode, which is
+done with the LaTeX::ToUnicode module, from the \f(CW\*(C`bibtexperllibs\*(C'\fR
+package (<https://ctan.org/pkg/bibtexperllibs>). The function must
+accept one string (the LaTeX text), and return one string (presumably
+the transformed string). The standard conversions are then applied to
+the returned string, so the configured function need only handle special
+cases, such as control sequences particular to the journal at hand.
+.SH "RPI FILE FORMAT"
+.IX Header "RPI FILE FORMAT"
+Here's the (relevant part of the) \f(CW\*(C`.rpi\*(C'\fR file corresponding to the
+\&\f(CW\*(C`rpsample.tex\*(C'\fR example in the \f(CW\*(C`resphilosophica\*(C'\fR package
+(<https://ctan.org/pkg/resphilosophica>):
+.PP
+.Vb 10
+\&  %authors=Boris Veytsman\eand A. U. Th{\eo }r\eand C. O. R\e"espondent
+\&  %title=A Sample Paper:\e\e \eemph  {A Template}
+\&  %year=2012
+\&  %volume=90
+\&  %issue=1\-\-2
+\&  %startpage=1
+\&  %endpage=1
+\&  %doi=10.11612/resphil.A31245
+\&  %paperUrl=http://borisv.lk.net/paper12
+\&  %publicationType=full_text
+.Ve
+.PP
+Other lines, some not beginning with %, are ignored (and not shown).
+For more details on processing, see the code.
+.PP
+The \f(CW%paperUrl\fR value is what will be associated with the given \f(CW%doi\fR
+(output as the \f(CW\*(C`resource\*(C'\fR element). Crossref strongly recommends that
+the url be for a so-called landing page, and not directly for a pdf
+(<https://www.crossref.org/education/member\-setup/creating\-a\-landing\-page/>).
+Special case: if the url is not specified, 
+and the journal is \fIRes\ Philosophica\fR,
+a special-purpose search url using pdcnet.org is returned.
+Any other journal must always specify this.
+.PP
+The \f(CW%authors\fR field is split at \f(CW\*(C`\eand\*(C'\fR (ignoring whitespace before
+and after), and output as the \f(CW\*(C`contributors\*(C'\fR element, using
+\&\f(CW\*(C`sequence="first"\*(C'\fR for the first listed, \f(CW\*(C`sequence="additional"\*(C'\fR for
+the remainder.
+.PP
+If the \f(CW%publicationType\fR is not specified, it defaults to
+\&\f(CW\*(C`full_text\*(C'\fR, since that has historically been the case; \f(CW\*(C`full_text\*(C'\fR
+can also be given explicitly. The other values allowed by the Crossref
+schema are \f(CW\*(C`abstract_only\*(C'\fR and \f(CW\*(C`bibliographic_record\*(C'\fR. Finally, if the
+value is \f(CW\*(C`omit\*(C'\fR, the \f(CW\*(C`publication_type\*(C'\fR attribute is omitted entirely
+from the given \f(CW\*(C`journal_article\*(C'\fR element.
+.PP
+Each \f(CW\*(C`.rpi\*(C'\fR must contain information for only one article, but multiple
+files can be read in a single run. It would not be difficult to support
+multiple articles in a single \f(CW\*(C`.rpi\*(C'\fR file, but it makes debugging and
+error correction easier when each uploaded \s-1XML\s0 contains a single
+article.
+.SS "\s-1MORE ABOUT AUTHOR NAMES\s0"
+.IX Subsection "MORE ABOUT AUTHOR NAMES"
+The three formats for names recognized are (not coincidentally) the same
+as BibTeX:
+.PP
+.Vb 3
+\&   First von Last
+\&   von Last, First
+\&   von Last, Jr., First
+.Ve
+.PP
+The forms can be freely intermixed within a single \f(CW%authors\fR line,
+separated with \f(CW\*(C`\eand\*(C'\fR (including the backslash). Commas as name
+separators are not supported, unlike BibTeX.
+.PP
+In short, you may almost always use the first form; you shouldn't if
+either there's a Jr part, or the Last part has multiple tokens but
+there's no von part. See the \f(CW\*(C`btxdoc\*(C'\fR (``BibTeXing'' by Oren Patashnik)
+document for details.
+.PP
+In the \f(CW%authors\fR line of a \f(CW\*(C`.rpi\*(C'\fR file, some secondary directives are
+recognized, indicated by \f(CW\*(C`|\*(C'\fR characters. Easiest to explain with an
+example:
+.PP
+.Vb 1
+\&  %authors=|organization|\eLaTeX\e Project Team \eand Alex Brown|orcid=123
+.Ve
+.PP
+Thus: 1) if \f(CW\*(C`|organization|\*(C'\fR is specified, the author name will be output
+as an \f(CW\*(C`organization\*(C'\fR contributor, instead of the usual \f(CW\*(C`person_name\*(C'\fR,
+as the Crossref schema requires.
+.PP
+2) If \f(CW\*(C`|orcid=\f(CIvalue\f(CW|\*(C'\fR is specified, the \fIvalue\fR is output as an
+\&\f(CW\*(C`ORCID\*(C'\fR element for that \f(CW\*(C`person_name\*(C'\fR.
+.PP
+These two directives, \f(CW\*(C`|organization\*(C'\fR| and \f(CW\*(C`|orcid|\*(C'\fR are mutually
+exclusive, because that's how the Crossref schema defines them. The \f(CW\*(C`=\*(C'\fR
+sign after \f(CW\*(C`orcid\*(C'\fR is required, while all spaces after the \f(CW\*(C`orcid\*(C'\fR
+keyword are ignored. Other than that, the \s-1ORCID\s0 value is output
+literally. (E.g., the \s-1ORCID\s0 value of \f(CW123\fR above is clearly invalid,
+but it would be output anyway, with no warning.)
+.PP
+Extra \f(CW\*(C`|\*(C'\fR characters, at the beginning or end of the entire \f(CW%authors\fR
+string, or doubled in the middle, are accepted and ignored. Whitespace
+is ignored around all \f(CW\*(C`|\*(C'\fR characters.
+.SH "CITATIONS"
+.IX Header "CITATIONS"
+Each \f(CW\*(C`.bbl\*(C'\fR file corresponding to an input \f(CW\*(C`.rpi\*(C'\fR file is read and
+used to output a \f(CW\*(C`citation_list\*(C'\fR element for that \f(CW\*(C`journal_article\*(C'\fR in
+the output \s-1XML.\s0 If no \f(CW\*(C`.bbl\*(C'\fR file exists for a given \f(CW\*(C`.rpi\*(C'\fR,
+no \f(CW\*(C`citation_list\*(C'\fR is output for that article.
+.PP
+The \f(CW\*(C`.bbl\*(C'\fR processing is rudimentary: only so-called
+\&\f(CW\*(C`unstructured_citation\*(C'\fR references are produced for Crossref, that is,
+the contents of the citation (each paragraph in the \f(CW\*(C`.bbl\*(C'\fR) is dumped
+as a single flat string without markup.
+.PP
+Bibliography text is unconditionally converted from TeX to \s-1XML,\s0 via the
+method described above. It is not unusual for the conversion to be
+incomplete or incorrect.  It is up to you to check for this; e.g., if
+any backslashes remain in the output, it is most likely an error.
+.PP
+Furthermore, it is assumed that the \f(CW\*(C`.bbl\*(C'\fR file contains a sequence of
+references, each starting with \f(CW\*(C`\ebibitem{\f(CIKEY\f(CW}\*(C'\fR (which itself must be
+at the beginning of a line, preceded only by whitespace), and the whole
+bibliography ending with \f(CW\*(C`\eend{thebibliography}\*(C'\fR (similarly at the
+beginning of a line). A bibliography not following this format will not
+produce useful results. Bibliographies can be created by hand, or with
+BibTeX, or any other method.
+.PP
+The \f(CW\*(C`key\*(C'\fR attribute for the \f(CW\*(C`citation\*(C'\fR element is taken as the \fI\s-1KEY\s0\fR
+argument to the \f(CW\*(C`\ebibitem\*(C'\fR command. The sequential number of the
+citation (1, 2, ...) is appended. The argument to \f(CW\*(C`\ebibitem\*(C'\fR can be
+empty (\f(CW\*(C`\ebibitem{}\*(C'\fR, and the sequence number will be used on its own.
+Although TeX will not handle empty \f(CW\*(C`\ebibitem\*(C'\fR keys, it can be
+convenient when creating a \f(CW\*(C`.bbl\*(C'\fR purely for Crossref.
+.PP
+The \f(CW\*(C`.rpi\*(C'\fR file is also checked for the bibliography information, in
+this same format.
+.PP
+Feature request: if anyone is interested in figuring out how to generate
+structured citations
+(<https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#citation>)
+instead of these flat text dumps, that would be great.
 .SH "EXAMPLES"
 .IX Header "EXAMPLES"
-.Vb 1
-\&  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex \-o result.xml
+.Vb 2
+\&  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex \e
+\&                      \-o result.xml
 \&
-\&  ltx2crossrefxml.pl \-c myconfig.cnf paper.tex \-o paper.xml
+\&  ltx2crossrefxml.pl \-c myconfig.cfg paper.tex \-o paper.xml
 .Ve
 .SH "AUTHOR"
 .IX Header "AUTHOR"
-Boris Veytsman
+Boris Veytsman <https://github.com/borisveytsman/crossrefware>
 .SH "COPYRIGHT AND LICENSE"
 .IX Header "COPYRIGHT AND LICENSE"
-Copyright (C) 2012\-2016  Boris Veytsman
+Copyright (C) 2012\-2021  Boris Veytsman
 .PP
 This is free software.  You may redistribute copies of it under the
 terms of the \s-1GNU\s0 General Public License
-<http://www.gnu.org/licenses/gpl.html>.  There is \s-1NO WARRANTY,\s0 to the
+<https://www.gnu.org/licenses/gpl.html>.  There is \s-1NO WARRANTY,\s0 to the
 extent permitted by law.

Modified: trunk/Master/texmf-dist/doc/man/man1/ltx2crossrefxml.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/support/crossrefware/Makefile
===================================================================
--- trunk/Master/texmf-dist/doc/support/crossrefware/Makefile	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/support/crossrefware/Makefile	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,8 +1,10 @@
+# Makefile for the (La)TeX crossrefware package. Public domain.
+
 SCRIPTS = \
 	ltx2crossrefxml.pl \
 	bibdoiadd.pl \
+	bibmradd.pl \
 	bibzbladd.pl \
-	bibmradd.pl \
 	biburl2doi.pl \
 	bbl2bib.pl
 
@@ -15,17 +17,16 @@
 all:  ${MAN1} ${PDF}
 	chmod a+x ${SCRIPTS}
 
+check:
+	./ltx2crossrefxml.pl --help
+	./ltx2crossrefxml.pl --version
 
-
 %.1: %.pl
-	pod2man -c "CROSSREF LIBRARY" -n $* -s 1 -r "" $< > $@
+	pod2man -c "LATEX CROSSREFWARE" -n $* -s 1 -r "" $< > $@
 
 
-
-
 clean:
-	$(RM) *.aux *.toc *.log *.tex *.idx *.ilg *.ind *.out *.zip *.tgz \
-	*~
+	$(RM) *.aux *.toc *.log *.tex *.idx *.ilg *.ind *.out *.zip *.tgz *~
 
 distclean: clean
 	$(RM) *.pdf *.1 *.3
@@ -42,4 +43,7 @@
 	pod2latex -modify -full -prefile head.ltx -out $@ $+
 
 archive: all clean
-	COPYFILE_DISABLE=1 tar -C .. -czvf ../$(PACKAGE).tgz --exclude '*~' --exclude '*.tgz' --exclude '*.zip'  --exclude CVS --exclude '.git*' $(PACKAGE); mv ../$(PACKAGE).tgz .
+	COPYFILE_DISABLE=1 tar -C .. -czvf ../$(PACKAGE).tgz --exclude '*~' \
+	  --exclude '*.tgz' --exclude '*.zip'  --exclude CVS \
+	  --exclude '.git*' $(PACKAGE)
+	mv ../$(PACKAGE).tgz .

Modified: trunk/Master/texmf-dist/doc/support/crossrefware/README
===================================================================
--- trunk/Master/texmf-dist/doc/support/crossrefware/README	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/support/crossrefware/README	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,55 +1,61 @@
 			 Crossrefware Bundle
-			  version 2017/11/26
+			  version 2021-10-02
 
-
 Scripts useful for working with Crossref, MathSciNet and Zentralblatt MATH.
 
-This work was commissioned by The Saint Lois University and The
-Princeton University (Mathematics Department)
-
 bibdoiadd.pl       - add DOI numbers to papers in a given bib file
 bibzbladd.pl       - add Zbl numbers to papers in a given bib file
 bibmradd.pl        - add MR  numbers to papers in a given bib file
 bbl2bib.pl         - convert `thebibliography' environment to a bib file
 biburl2doi.pl      - convert urls pointing to doi.org to dois
-ltx2crossrefxml.pl - a tool for creation of XML files for submitting to crossref.org
+ltx2crossrefxml.pl - create XML files for submission to crossref.org
 
-The scripts use bibtexperllibs libraries from CTAN
+Bug reports, source code: https://github.com/borisveytsman/crossrefware
+Releases: https://ctan.org/pkg/crossrefware
 
+These scripts rely on the bibtexperllibs libraries (replace
+/crossrefware with /bibtexperllibs in the above urls).
+
+For an example of using ltx2crossrefxml and associated code, see the
+TUGboat processing at
+https://github.com/TeXUsersGroup/tugboat/tree/trunk/capsules/crossref.
+
 Installation:
-
 1.  Move *.pl files to the binaries directory in your system.
-
 2.  Use *.cfg files as configuration files samples.
-
 3.  Move *.1 to the man pages directory in your system.
 
+This work was commissioned by The Saint Louis University and
+Princeton University (Mathematics Department). Thank you!
 
 Changes:
 
-	2018/04/15    - Fixed a bug in bibzbladd
-		      - Work around a bug in mathscinet
+2021-10-02    - changed TEXSELFAUTOPARENT to TEXMFROOT
 
-	2017/11/26:   - Switched to date-based versioning for the
-		        bundle. 
-                      - bbl2bib now converts urls starting with
-	                http(s)://(dx.)doi.org to dois.  This functionality
-		        is also provided by a separate script biburl2doi
-		      - bibadddoi now has the option to (not) canonize
-		        names
+2021-01-18    - all conversions moved from ltx2crossrefxml to LaTeX::ToUnicode.
+	      - config file can provide a hook for processing.
 
-	Version 2.2:  We now treat absent CA permissively.
-		      New debug options for bbl2bib
+2018/04/15    - Fixed a bug in bibzbladd
+	      - Work around a bug in mathscinet
 
-        Version 2.1a: Bug in bbl2bib corrected
+2017/11/26    - Switched to date-based versioning for the bundle. 
+              - bbl2bib now converts urls starting with
+                http(s)://(dx.)doi.org to dois.  This functionality
+	        is also provided by a separate script biburl2doi
+	      - bibadddoi now has the option to (not) canonize names
 
-        Version 2.1:  New options to add empty doi and zbl when doi or zbl
-		      are not found to prevent repeated searches.
-		      Added bibmradd.pl
-		      Added bbl2bib.pl
+Version 2.2:  We now treat absent CA permissively.
+	      New debug options for bbl2bib
 
-        Version 2.0:  Moved to BibTeX::Parser suite.
-                      Now we use new Zbmath interface.
-                      Now we use bibtexperllibs libraries
+Version 2.1a: Bug in bbl2bib corrected
 
-        Version 1.1:  workaround for a bug with macrons in TeX::Encode
\ No newline at end of file
+Version 2.1:  New options to add empty doi and zbl when doi or zbl
+	      are not found to prevent repeated searches.
+	      Added bibmradd.pl
+	      Added bbl2bib.pl
+
+Version 2.0:  Moved to BibTeX::Parser suite.
+              Now we use new Zbmath interface.
+              Now we use bibtexperllibs libraries
+
+Version 1.1:  workaround for a bug with macrons in TeX::Encode

Modified: trunk/Master/texmf-dist/doc/support/crossrefware/crossrefware.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/support/crossrefware/head.ltx
===================================================================
--- trunk/Master/texmf-dist/doc/support/crossrefware/head.ltx	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/doc/support/crossrefware/head.ltx	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,13 +1,11 @@
-\documentclass{article}
-\usepackage{makeidx}
-\usepackage[osf]{mathpazo}
+\documentclass[11pt]{article}
+\usepackage{makeidx,fullpage}
 \usepackage[hidelinks]{hyperref}
 \makeindex
 \begin{document}
 \sloppy
 \title{Crossrefware documentation\thanks{This work was commissioned by
-  The Saint Lois University and The Princeton University (Mathematics
-  Department)}}
+  Saint Louis University and Princeton University (Mathematics Department)}}
 \author{Boris Veytsman\thanks{borisv at lk.net, boris at varphi.com}}
 \maketitle
 \tableofcontents
@@ -14,26 +12,39 @@
 
 \section{Introduction}
 
-These scripts can be used to submit files to Crossref, check and add
-doi numbers, MathSciNet numbers and ZbMath numbers to papers, and to
-convert `bbl' files to `bib' files.
+These scripts can be used to create files for submission to Crossref,
+check and add doi numbers, MathSciNet numbers and ZbMath numbers to
+papers, and to convert `bbl' files to `bib' files.
 
-I am grateful to Josko Plazonic from Princeton Math Dept whose
-(unpublished) Python script was an inspiration for this suite.
+Development sources and issue tracker are on github:
+\url{https://github.com/borisveytsman/crossrefware}.
+Releases are made on CTAN:
+\url{https://ctan.org/pkg/crossrefware}
+and from there included in \TeX\ Live and other distributions.
 
-The script \path{ltx2crossrefxml} is used to extract the information
-from a \LaTeX\ file and generate an XML file suitable for submission
-to Crossref (the organization that keeps DOI numbering system).  
+The script \path{ltx2crossrefxml} extracts information from \path{.rpi}
+files and (if present) \path{.bbl} files and generates an XML file
+suitable for submission to crossref.org. (Crossref is the organization
+that handles DOI numbers for scholarly papers.) It does not actually
+upload the submission, just outputs XML.
 
-Several scripts, \path{bibdoiadd}, \path{bibmradd} and \path{bibmradd}
+This \path{.rpi} file is a plain text representation of the metadata for
+one article. It is written by the \path{resphilosophica} package
+(\url{https://ctan.org/pkg/resphilosophica}). It can also be created by
+hand.
+
+Several scripts, \path{bibdoiadd}, \path{bibmradd} and \path{bibzbladd}
 take a \path{bib} file, and add to each entry a DOI, MR or ZBL number
 correspondingly, if they can find this entry in the corresponding
 database.   
 
-Script \path{bbl2bib} tries to reconstruct a \path{bib} file from the
+The \path{bbl2bib} script tries to reconstruct a \path{bib} file from the
 corresponding \path{thebibliography} environment.  One can argue that
-this operation is akin to reconstructing a cow from the steak.  The
-way the script does it is searching for the entry in the MR database,
+this operation is akin to reconstructing the cow from a steak.  The
+way the script does it is by searching for the entry in the MR database,
 and creating the corresponding Bib\TeX\ fields.
 
-Below are manual pages for these scripts.  
+I am grateful to Josko Plazonic from Princeton Math Dept whose
+(unpublished) Python script was an inspiration for this suite.
+
+Following are manual pages for these scripts.  

Modified: trunk/Master/texmf-dist/scripts/crossrefware/bbl2bib.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/bbl2bib.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/bbl2bib.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -86,7 +86,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -98,7 +98,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Master/texmf-dist/scripts/crossrefware/bibdoiadd.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/bibdoiadd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/bibdoiadd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -85,7 +85,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -97,7 +97,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Master/texmf-dist/scripts/crossrefware/bibmradd.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/bibmradd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/bibmradd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -52,7 +52,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -64,7 +64,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Master/texmf-dist/scripts/crossrefware/biburl2doi.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/biburl2doi.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/biburl2doi.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -39,7 +39,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2017  Boris Veytsman
+Copyright (C) 2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -51,7 +51,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Master/texmf-dist/scripts/crossrefware/bibzbladd.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/bibzbladd.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/bibzbladd.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -52,7 +52,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2014-2017  Boris Veytsman
+Copyright (C) 2014-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
@@ -64,7 +64,7 @@
 use strict;
 BEGIN {
     # find files relative to our installed location within TeX Live
-    chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+    chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
     if (length($TLMaster)) {
 	unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
     }

Modified: trunk/Master/texmf-dist/scripts/crossrefware/ltx2crossrefxml.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/crossrefware/ltx2crossrefxml.pl	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/scripts/crossrefware/ltx2crossrefxml.pl	2021-10-03 20:32:45 UTC (rev 60689)
@@ -4,13 +4,13 @@
 
 =head1 NAME
 
-ltx2crossrefxml.pl - a tool for creation of XML files for submitting to crossref.
+ltx2crossrefxml.pl - create XML files for submitting to crossref.org
 
 =head1 SYNOPSIS
 
-ltx2crossrefxml [B<-c> I<config_file>]  [B<-o> I<output>] I<latex_file> I<latex_file> ...
+ltx2crossrefxml [B<-c> I<config_file>]  [B<-o> I<output_file>] [B<-input-is-xml>]
+                I<latex_file1> I<latex_file2> ...
 
-
 =head1 OPTIONS
 
 =over 4
@@ -17,89 +17,326 @@
 
 =item B<-c> I<config_file>
 
-Configuration file.  If this file is absent, some defaults are used.
+Configuration file.  If this file is absent, defaults are used.
 See below for its format.
 
+=item B<-o> I<output_file>
 
-=item B<-o> I<output>
-
 Output file.  If this option is not used, the XML is output to stdout.
 
+=item B<-rpi-is-xml>
+
+Do not transform author and title input strings, assume they are valid XML.
+
 =back
 
+The usual C<--help> and C<--version> options are also supported. Options
+can begin with either C<-> or C<-->, and ordered arbitrarily.
+
 =head1 DESCRIPTION
 
-The script takes a number of latex files and produces an XML file
-ready for submission to Crossref.  Each file must be previously processed
-by LaTeX with the newest C<resphilosophica> package: the package creates
-the file C<.rti> wtih the information about the bibliography.
+For each given I<latex_file>, this script reads C<.rpi> and (if they
+exist) C<.bbl> files and outputs corresponding XML that can be uploaded
+to Crossref (L<https://crossref.org>). Any extension of I<latex_file> is
+ignored, and I<latex_file> itself is not read (and need not even exist).
 
-The processing of reference list is at present rather limited: only so
-called unstructured references are produced.
+Each C<.rpi> file specifies the metadata for a single article to be
+uploaded to Crossref (a C<journal_article> element in their schema); an
+example is below. These files are output by the C<resphilosophica>
+package (L<https://ctan.org/pkg/resphilosophica>), but (as always) can
+also be created by hand or by whatever other method you implement.
 
+Any C<.bbl> files present are used for the citation information in the
+output XML. See the L<CITATIONS> section below.
+
+Unless C<--rpi-is-xml> is specified, for all text (authors, title,
+citations), standard TeX control sequences are replaced with plain text
+or UTF-8 or eliminated, as appropriate. The C<LaTeX::ToUnicode::convert>
+routine is used for this (L<https://ctan.org/pkg/bibtexperllibs>).
+Tricky TeX control sequences will almost surely not be handled
+correctly. If C<--rpi-is-xml> is given, the author and title strings
+from the rpi files are output as-is, assuming they are valid XML; no
+checking is done. Citation text from C<.bbl> files is always converted
+from LaTeX to plain text.
+
+This script just writes an XML file. It's up to you to actually do the
+uploading to Crossref; for example, you can use their Java tool 
+C<crossref-upload-tool.jar>
+(L<https://www.crossref.org/education/member-setup/direct-deposit-xml/https-post>).
+For the definition of their schema, see
+L<https://data.crossref.org/reports/help/schema_doc/4.4.2/index.html>
+(this is the schema version currently followed by this script).
+
 =head1 CONFIGURATION FILE FORMAT
 
-The configuration file is mostly self-explanatory: it has comments
-(starting with C<#>) and assginments in the form
+The configuration file is read as Perl code. Thus, comment lines
+starting with C<#> and blank lines are ignored. The other lines are
+typically assignments in the form (spaces are optional):
 
-   $field = value ;
+    $variable = value ;
 
+Usually the value is a C<"string"> enclosed in ASCII double-quote or
+single-quote characters, per Perl syntax. The idea is to specify the
+user-specific and journal-specific values needed for the Crossref
+upload. The variables which are used are these:
+
+    $depositorName = "Depositor Name";
+    $depositorEmail = 'depositor at example.org';
+    $registrant = 'Registrant';  # organization name
+    $fullTitle = "FULL TITLE";   # journal name
+    $issn = "1234-5678";         # required
+    $abbrevTitle = "ABBR. TTL."; # optional
+    $coden = "CODEN";            # optional
+
+
+For a given run, all C<.rpi> data read is assumed to belong to the
+journal that is specified in the configuration file. More precisely, the
+configuration data is written as a C<journal_metadata> element, with
+given C<full_title>, C<issn>, etc., and then each C<.rpi> is written as
+C<journal_issue> plus C<journal_article> elements.
+
+The configuration file can also define one Perl function:
+C<LaTeX_ToUnicode_convert_hook>. If it is defined, it is called at the
+beginning of the procedure that converts LaTeX text to Unicode, which is
+done with the L<LaTeX::ToUnicode> module, from the C<bibtexperllibs>
+package (L<https://ctan.org/pkg/bibtexperllibs>). The function must
+accept one string (the LaTeX text), and return one string (presumably
+the transformed string). The standard conversions are then applied to
+the returned string, so the configured function need only handle special
+cases, such as control sequences particular to the journal at hand.
+
+=head1 RPI FILE FORMAT
+
+Here's the (relevant part of the) C<.rpi> file corresponding to the
+C<rpsample.tex> example in the C<resphilosophica> package
+(L<https://ctan.org/pkg/resphilosophica>):
+
+  %authors=Boris Veytsman\and A. U. Th{\o }r\and C. O. R\"espondent
+  %title=A Sample Paper:\\ \emph  {A Template}
+  %year=2012
+  %volume=90
+  %issue=1--2
+  %startpage=1
+  %endpage=1
+  %doi=10.11612/resphil.A31245
+  %paperUrl=http://borisv.lk.net/paper12
+  %publicationType=full_text
+
+Other lines, some not beginning with %, are ignored (and not shown).
+For more details on processing, see the code.
+
+The C<%paperUrl> value is what will be associated with the given C<%doi>
+(output as the C<resource> element). Crossref strongly recommends that
+the url be for a so-called landing page, and not directly for a pdf
+(L<https://www.crossref.org/education/member-setup/creating-a-landing-page/>).
+Special case: if the url is not specified, 
+and the journal is I<S<Res Philosophica>>,
+a special-purpose search url using L<pdcnet.org> is returned.
+Any other journal must always specify this.
+
+The C<%authors> field is split at C<\and> (ignoring whitespace before
+and after), and output as the C<contributors> element, using
+C<sequence="first"> for the first listed, C<sequence="additional"> for
+the remainder.
+
+If the C<%publicationType> is not specified, it defaults to
+C<full_text>, since that has historically been the case; C<full_text>
+can also be given explicitly. The other values allowed by the Crossref
+schema are C<abstract_only> and C<bibliographic_record>. Finally, if the
+value is C<omit>, the C<publication_type> attribute is omitted entirely
+from the given C<journal_article> element.
+
+Each C<.rpi> must contain information for only one article, but multiple
+files can be read in a single run. It would not be difficult to support
+multiple articles in a single C<.rpi> file, but it makes debugging and
+error correction easier when each uploaded XML contains a single
+article.
+
+=head2 MORE ABOUT AUTHOR NAMES
+
+The three formats for names recognized are (not coincidentally) the same
+as BibTeX:
+
+   First von Last
+   von Last, First
+   von Last, Jr., First
+   
+The forms can be freely intermixed within a single C<%authors> line,
+separated with C<\and> (including the backslash). Commas as name
+separators are not supported, unlike BibTeX.
+
+In short, you may almost always use the first form; you shouldn't if
+either there's a Jr part, or the Last part has multiple tokens but
+there's no von part. See the C<btxdoc> (``BibTeXing'' by Oren Patashnik)
+document for details.
+
+In the C<%authors> line of a C<.rpi> file, some secondary directives are
+recognized, indicated by C<|> characters. Easiest to explain with an
+example:
+
+  %authors=|organization|\LaTeX\ Project Team \and Alex Brown|orcid=123
+
+Thus: 1) if C<|organization|> is specified, the author name will be output
+as an C<organization> contributor, instead of the usual C<person_name>,
+as the Crossref schema requires.
+
+2) If C<|orcid=I<value>|> is specified, the I<value> is output as an
+C<ORCID> element for that C<person_name>.
+
+These two directives, C<|organization>| and C<|orcid|> are mutually
+exclusive, because that's how the Crossref schema defines them. The C<=>
+sign after C<orcid> is required, while all spaces after the C<orcid>
+keyword are ignored. Other than that, the ORCID value is output
+literally. (E.g., the ORCID value of C<123> above is clearly invalid,
+but it would be output anyway, with no warning.)
+
+Extra C<|> characters, at the beginning or end of the entire C<%authors>
+string, or doubled in the middle, are accepted and ignored. Whitespace
+is ignored around all C<|> characters.
+
+=head1 CITATIONS
+
+Each C<.bbl> file corresponding to an input C<.rpi> file is read and
+used to output a C<citation_list> element for that C<journal_article> in
+the output XML. If no C<.bbl> file exists for a given C<.rpi>,
+no C<citation_list> is output for that article.
+
+The C<.bbl> processing is rudimentary: only so-called
+C<unstructured_citation> references are produced for Crossref, that is,
+the contents of the citation (each paragraph in the C<.bbl>) is dumped
+as a single flat string without markup.
+
+Bibliography text is unconditionally converted from TeX to XML, via the
+method described above. It is not unusual for the conversion to be
+incomplete or incorrect.  It is up to you to check for this; e.g., if
+any backslashes remain in the output, it is most likely an error.
+
+Furthermore, it is assumed that the C<.bbl> file contains a sequence of
+references, each starting with C<\bibitem{I<KEY>}> (which itself must be
+at the beginning of a line, preceded only by whitespace), and the whole
+bibliography ending with C<\end{thebibliography}> (similarly at the
+beginning of a line). A bibliography not following this format will not
+produce useful results. Bibliographies can be created by hand, or with
+BibTeX, or any other method.
+
+The C<key> attribute for the C<citation> element is taken as the I<KEY>
+argument to the C<\bibitem> command. The sequential number of the
+citation (1, 2, ...) is appended. The argument to C<\bibitem> can be
+empty (C<\bibitem{}>, and the sequence number will be used on its own.
+Although TeX will not handle empty C<\bibitem> keys, it can be
+convenient when creating a C<.bbl> purely for Crossref.
+
+The C<.rpi> file is also checked for the bibliography information, in
+this same format.
+
+Feature request: if anyone is interested in figuring out how to generate
+structured citations
+(L<https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#citation>)
+instead of these flat text dumps, that would be great.
+
 =head1 EXAMPLES
 
-  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex -o result.xml
+  ltx2crossrefxml.pl ../paper1/paper1.tex ../paper2/paper2.tex \
+                      -o result.xml
 
-  ltx2crossrefxml.pl -c myconfig.cnf paper.tex -o paper.xml
+  ltx2crossrefxml.pl -c myconfig.cfg paper.tex -o paper.xml
 
 =head1 AUTHOR
 
-Boris Veytsman
+Boris Veytsman L<https://github.com/borisveytsman/crossrefware>
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2012-2016  Boris Veytsman
+Copyright (C) 2012-2021  Boris Veytsman
 
 This is free software.  You may redistribute copies of it under the
 terms of the GNU General Public License
-L<http://www.gnu.org/licenses/gpl.html>.  There is NO WARRANTY, to the
+L<https://www.gnu.org/licenses/gpl.html>.  There is NO WARRANTY, to the
 extent permitted by law.
 
-
 =cut
 
  use strict;
+ use warnings;
 
+ use Cwd;
+ use File::Basename;
+ use File::Spec;
+
  BEGIN {
      # find files relative to our installed location within TeX Live
-     chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
+     chomp(my $TLMaster = `kpsewhich -var-value=TEXMFROOT`); # TL root
      if (length($TLMaster)) {
 	 unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
      }
+     # find development bibtexperllibs in sibling checkout to this script,
+     # even if $0 is a symlink. All irrelevant when using from an installation.
+     my $real0 = Cwd::abs_path($0);
+     my $scriptdir = File::Basename::dirname($real0);
+     my $dev_btxperllibs = Cwd::abs_path("$scriptdir/../bibtexperllibs");
+     # we need the lib/ subdirectories inside ...
+     unshift (@INC, glob ("$dev_btxperllibs/*/lib")) if -d $dev_btxperllibs;
  }
+
  use POSIX qw(strftime);
+
  use BibTeX::Parser::Author;
- use LaTeX::ToUnicode qw (convert);
- use File::Basename;
- use File::Spec;
- my $USAGE="USAGE: $0 [-c config] [-o output] file1 file2 ...\n";
-my $VERSION = <<END;
-ltx2crossrefxml v2.2
-This is free software.  You may redistribute copies of it under the
-terms of the GNU General Public License
-http://www.gnu.org/licenses/gpl.html.  There is NO WARRANTY, to the
-extent permitted by law.
-$USAGE
+ use LaTeX::ToUnicode;
+
+ my $USAGE = <<END;
+Usage: $0 [-c CONFIG] [-o OUTPUT] [--rpi-is-xml] LTXFILE...
+
+Convert .rpi and (if any are present) .bbl files corresponding to each
+LTXFILE to xml, for submitting to crossref.org. The LTXFILE is not read
+(and need not even exist); any extension it has is replaced by .rpi and
+.bbl.
+
+The .rpi files are plain text, with values on lines beginning with %, as
+output by (for example) the resphilosophica LaTeX package. The .bbl
+files are as output by BibTeX. Both are also commonly created by hand.
+The documentation for this script has examples.
+
+The xml is written to standard output by default; the -o (--output)
+option overrides this.
+
+If the -c (--config) option is given, the given file is read before any
+processing is done. This is used to define journal-specific defaults.
+
+The usual --help and --version options are also supported.
+
+For an example of using this script and associatd code, see the TUGboat
+processing at
+https://github.com/TeXUsersGroup/tugboat/tree/trunk/capsules/crossref.
+
+Development sources, bug tracker: https://github.com/borisveytsman/crossrefware
+Releases: https://ctan.org/pkg/crossrefware
 END
- use Getopt::Std;
+
+ my $VERSION = <<END;
+ltx2crossrefxml (crossrefware) 2.51
+This is free software: you are free to change and redistribute it, under
+the terms of the GNU General Public License
+http://www.gnu.org/licenses/gpl.html (any version).
+There is NO WARRANTY, to the extent permitted by law.
+
+Written by Boris Veytsman.
+END
+ use Getopt::Long;
  my %opts;
- getopts('c:o:hV',\%opts) or die $USAGE;
+
+ GetOptions(
+   "config|c=s" => \($opts{c}),
+   "output|o=s" => \($opts{o}),
+   "rpi-is-xml!"=> \($opts{xi}),
+   "version|V"  => \($opts{V}),
+   "help|?"     => \($opts{h})) || pod2usage(1);
+
+ if ($opts{h}) { print "$USAGE\n$VERSION"; exit 0; } 
+ if ($opts{V}) { print $VERSION; exit 0; } 
+
  use utf8;
  binmode(STDOUT, ":utf8");
 
-if ($opts{h} || $opts{V}){
-    print $VERSION;
-    exit 0;
-}
-
  ################################################################
  # Defaults and parameters
  ################################################################
@@ -107,32 +344,36 @@
  *OUT=*STDOUT;
  
  if (defined($opts{o})) {
-     open (OUT, ">$opts{o}") or die "Cannot open file $opts{o} for writing\n";
+     open (OUT, ">$opts{o}") or die "open($opts{o}) for writing failed: $!\n";
      binmode(OUT, ":utf8")
  }
 
 
- our $depositorName='DEPOSITOR_NAME';
- our $depositorEmail='DEPOSITOR_EMAIL';
- our $registrant='REGISTRANT';
+ our $depositorName = 'DEPOSITOR_NAME';
+ our $depositorEmail = 'DEPOSITOR_EMAIL';
+ our $registrant = 'REGISTRANT';
  our $fullTitle = "FULL TITLE";
- our $abbrevTitle = "ABBR. Title.";
- our $issn = "1234-5678";
+ our $abbrevTitle = "ABBR. TTL.";
+ our $issn = "0000-0000";
  our $coden = "CODEN";
- our $batchId="ltx2crossref$$";
- our $timestamp=strftime("%Y%m%d%H%M%S", gmtime);
+ our $timestamp = strftime("%Y%m%d%H%M%S", gmtime);
+ # use timestamp in batchid, since the value is supposed to be unique
+ # for every submission to crossref by a given publisher.
+ # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#doi_batch_id
+ our $batchId="ltx2crossref-$timestamp-$$";
 
 
  if ($opts{c}) {
      if (-r $opts{c}) {
-	 require $opts{c};
+         # if config arg is absolute, fine; if not, prepend "./" as slightly
+         # less troublesome than putting "." in the @INC path.
+         my $rel = (File::Spec->file_name_is_absolute($opts{c}) ? "" : "./");
+	 require "$rel$opts{c}";
      } else {
-	 die "Cannot read options $opts{c}.  $USAGE";
+	 die "Cannot read config file $opts{c}. Goodbye.";
      }
  }
 
-
-
  PrintHead();
 
  # 
@@ -154,11 +395,9 @@
 	     }
 	 }
      }
-
  }
 
  PrintTail();
-
  exit(0);
 
 
@@ -165,106 +404,150 @@
 #####################################################
 #  Printing the head and the tail
 #####################################################
-
 sub PrintHead {
+    # do not output the <coden> or <abbrev_title> if the journal doesn't
+    # have them.
+    my $indent = "        ";
+    my $coden_out = $coden ne "CODEN" ? "\n$indent<coden>$coden</coden>" : "";
+    my $abbrev_title_out = $abbrevTitle ne "ABBR. TTL."
+        ? "\n$indent<abbrev_title>$abbrevTitle</abbrev_title>"
+        : "";
 
-
+    # as of schema version 4.3.4, crossref renamed the <name> element
+    # inside <depositor> to <depositor_name>. Sigh. Something to take
+    # into account with older schemas.
+    # https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/schema-versions/
     print OUT <<END;
-<doi_batch xmlns="http://www.crossref.org/schema/4.3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="4.3.0" xsi:schemaLocation="http://www.crossref.org/schema/4.3.0 http://www.crossref.org/schema/deposit/crossref4.3.0.xsd">
+<doi_batch xmlns="http://www.crossref.org/schema/4.4.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="4.4.2" xsi:schemaLocation="http://www.crossref.org/schema/4.4.2 http://www.crossref.org/schema/deposit/crossref4.4.2.xsd">
   <head>
     <doi_batch_id>$batchId</doi_batch_id>
     <timestamp>$timestamp</timestamp>
     <depositor>
-      <name>$depositorName</name>
+      <depositor_name>$depositorName</depositor_name>
       <email_address>$depositorEmail</email_address>
     </depositor>
     <registrant>$registrant</registrant>
   </head>
-  <body>
-    <journal>
-      <journal_metadata language="en">
-        <full_title>$fullTitle</full_title>
-        <abbrev_title>$abbrevTitle</abbrev_title>
-	<issn>$issn</issn>
-	<coden>$coden</coden>
-      </journal_metadata>
+  <body><journal>
+    <journal_metadata language="en">
+      <full_title>$fullTitle</full_title>$abbrev_title_out
+      <issn>$issn</issn>$coden_out	
+    </journal_metadata>
 END
-
 }
 
 sub PrintTail {
     print OUT <<END;
-    </journal>
-  </body>
+  </journal></body>
 </doi_batch>
 END
 
-return;
+    return;
 }
 
 
 #######################################################
-#  Adding one paper
+#  Adding one paper from $file.rpi and .bbl to global %papers.
 #######################################################
-
 sub AddPaper {
     my $file = shift;
     my ($name,$path,$suffix) = fileparse($file, '\.[^\.]*$');
     my $rpifile = File::Spec->catfile($path, "$name.rpi");
-    open (RPI, $rpifile) or die 
-     "Cannot find $rpifile.  Did you process $file?\n";
+    open (RPI, $rpifile)
+      or die "open($rpifile) failed: $! (did you process $file?)\n";
     my %data;
     while (<RPI>) {
 	chomp;
         if (/^%([^=]*)\s*=\s*(.*)\s*$/) {
-           $data{$1}=$2;
+           if (exists $data{$1}) {
+             warn "$rpifile:$.: already saw data{$1}=$data{$1};"
+                  . " an .rpi file should have data for only one article,"
+                  . " but overwriting with `$2' anyway.\n";
+           }
+           $data{$1} = $2;
         }
     }
     close RPI;
+    
+    # look for bibliographies in both the .rpi and any .bbl file.
     my @bibliography;
     foreach my $bibfile ($file, File::Spec->catfile($path, "$name.bbl")) {
-         @bibliography = (@bibliography, 
-          AddBibliography($bibfile));
+         @bibliography = (@bibliography, AddBibliography($bibfile));
     }
-    $data{'bibliography'}=\@bibliography;
+    $data{'bibliography'} = \@bibliography;
+
+    # Die if the fields we use unconditionally are empty. Not all of
+    # them are required by the schema, but we can wait to generalize.
+    foreach my $field (qw(title year volume issue startpage endpage doi)) {
+        if (! $data{$field}) {
+            die ("$0: field must not be empty: $field\n  "
+                 . &debug_hash_as_string("whole hash", %data));
+        }
+    }
+
     push @{$papers{$data{year}}->{$data{volume}}->{$data{issue}}}, \%data;
 }
 
 ############################################################## 
-# Reading a list of papers and adding  it to the
-# bibliography
+# Reading a list of papers from BIBFILE and adding it to the
+# bibliography. Each item is assumed to start with
+# \bibitem{KEY} and the whole bib to end with \end{thebibliography}.
+# 
+# We return a list of hashes, each hash with a single key, the citation
+# key, and its value a flat string of the entry.
+# 
+# No conversion of the text is done here.
 ##############################################################
-
 sub AddBibliography {
     my $bibfile = shift;
     open (BIB, $bibfile) or return;
+    
     my $insidebibliography = 0;
-    my $currpaper="";
+    my $currpaper = ""; # that is, the current bib entry
+    my $bibno = 0;
     my @result;
     my $key;
     while (<BIB>) {
 	chomp;
-	if (/^\s*\\bibitem(?:\[.*\])?+\{(.+)\}/) {
+	next if /^\s*%/; # TeX comment line
+	s/[ \t]%.*//;    # remove TeX comment
+	#
+	# allow empty \bibitem key for the sake of handwritten bbls.
+	# Similarly, might be more stuff on the line when handwritten.
+	if (s/^\s*\\bibitem(?:\[.*?\])?+\s*\{(.*?)\}//) {
+	    my $newkey = $1;
 	    if ($insidebibliography) {
 		if ($currpaper) {
-		    my %paperhash;
-		    $paperhash{$key}=$currpaper;
+                    # Append the current sequence number for this citation,
+                    # since that's what Crossref recommends (sort of).
+                    # For prettiness, if the key is otherwise empty,
+                    # don't include a dash beforehand.
+		    $bibno++;
+                    $key .= ($key ? "-" : "") . $bibno;
+                    #
+                    my %paperhash;
+		    $paperhash{$key} = $currpaper;
 		    push @result, \%paperhash;
 		}
 	    }
-	    $key = $1;
-	    $currpaper="";
-	    $insidebibliography=1;
+	    # The citation key (required by schema) starts as the bibitem key.
+	    $key = $newkey;
+	    
+	    $currpaper = $_;
+	    $insidebibliography = 1;
 	    next;
 	}
 	if (/^\s*\\end\{thebibliography\}/) {
 	    if ($currpaper) {
-		    my %paperhash;
-		    $paperhash{$key}=$currpaper;
-		    push @result, \%paperhash;
+	        $bibno++;
+                $key .= ($key ? "-" : "") . $bibno;
+                #
+		my %paperhash;
+		$paperhash{$key} = $currpaper;
+		push @result, \%paperhash;
 	    }
-	    $currpaper="";
-	    $insidebibliography=0;
+	    $currpaper = "";
+	    $insidebibliography = 0;
 	    next;
 	}
 	if ($insidebibliography) {
@@ -272,6 +555,14 @@
 	}
     }
     close BIB;
+    
+    # We look in the .rpi files too, which will generally have none.
+    if (@result == 0 && $bibfile =~ /\.bbl$/) {
+        warn "$0: no \\bibitems found in: $bibfile\n";
+    } elsif ($insidebibliography) {
+        warn "$0: no \\end{thebibliography} found in: $bibfile\n";
+        warn "$0:   so the last bib entry is missing.\n";
+    }
     return @result;
 }
 
@@ -278,19 +569,16 @@
 #################################################################
 #  Printing information about one issue
 #################################################################
-
 sub PrintIssueHead {
     my ($year, $volume, $issue) = @_;
     print OUT <<END;
-      <journal_issue>
-        <publication_date media_type="print">
-          <year>$year</year>
-        </publication_date>
-        <journal_volume>
-          <volume>$volume</volume>
-        </journal_volume>
-        <issue>$issue</issue>
-      </journal_issue>
+    <journal_issue>
+      <publication_date media_type="print">
+        <year>$year</year>
+      </publication_date>
+      <journal_volume><volume>$volume</volume></journal_volume>
+      <issue>$issue</issue>
+    </journal_issue>
 END
 }
 
@@ -299,127 +587,206 @@
 ###############################################################
 sub PrintPaper {
     my $paper = shift;
-    my $title=convert($paper->{title});
-    my $url=GetURL($paper);
+    my $title = SanitizeText($paper->{title});
+    my $url = GetURL($paper);
+    my $publication_type = GetPublicationType($paper->{publicationType});
+    
+    &TitleCheck($title);
     print OUT <<END;
-      <journal_article publication_type="full_text">
-        <titles>
-           <title>
-             $title
-           </title>
-        </titles>
-        <contributors>
+    <journal_article$publication_type>
+      <titles>
+        <title>$title</title>
+      </titles>
+      <contributors>
 END
-my @authors = split /\s*\\and\s*/, $paper->{authors};
-    my $seq='first';
+    my @authors = split /\s*\\and\s*/, $paper->{authors};
+    my $seq = 'first';
     foreach my $author (@authors) {
-	print OUT <<END;
-          <person_name sequence="$seq" contributor_role="author">
-END
-$seq='additional';
-	PrintAuthor($author);
-	print OUT <<END;
-          </person_name>
-END
-
+	PrintAuthor($author, $seq);
+        $seq = 'additional';
     }
 
     print OUT <<END;
-        </contributors>
-        <publication_date media_type="print">
-           <year>$paper->{year}</year>
-        </publication_date>
-        <pages>
-           <first_page>$paper->{startpage}</first_page>
-           <last_page>$paper->{endpage}</last_page>
-        </pages>
-        <doi_data>
-          <doi>$paper->{doi}</doi>
-          <timestamp>$timestamp</timestamp>
-	  <resource>$url</resource>
-        </doi_data>
+      </contributors>
+      <publication_date media_type="print">
+        <year>$paper->{year}</year>
+      </publication_date>
+      <pages>
+        <first_page>$paper->{startpage}</first_page>
+        <last_page>$paper->{endpage}</last_page>
+      </pages>
+      <doi_data>
+        <doi>$paper->{doi}</doi>
+        <timestamp>$timestamp</timestamp>
+        <resource>$url</resource>
+      </doi_data>
 END
 
-if (scalar(@{$paper->{bibliography}})) {
+    if (scalar(@{$paper->{bibliography}})) {
     print OUT <<END;
-        <citation_list>
+      <citation_list>
 END
     foreach my $citation (@{$paper->{bibliography}}) {
 	PrintCitation($citation);
     }
     print OUT <<END;
-        </citation_list>
+      </citation_list>
 END
-}
+    }
 
     print OUT <<END;
-      </journal_article>
+    </journal_article>
 END
+}
 
 
+###############################################################
+# Crossref <title> strings can contain a few so-called "face" HTML
+# commands. Complain if they have anything anything else.
+# schema doc: https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#title
+#   face doc: https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/face-markup/
+# mathml doc: https://www.crossref.org/education/content-registration/crossrefs-metadata-deposit-schema/including-mathml-in-deposits/
+# 
+# We don't technically validate the string, e.g., mismatched tags will
+# go unnoticed here. The real validator at Crossref will catch whatever.
+###############################################################
+sub TitleCheck {
+    my $title = shift;
+    my $orig_title = $title;
+    
+    foreach my $tag (qw(b em i ovl scp strong sub sup tt u)) {
+        $title =~ s,<\s*/?$tag\s*>,,g; # eradicate <tag> and </tag>
+    }
+
+    # <font> can (maybe?) take lots of extra attributes:
+    $title =~ s,<\s*/?font.*?>,,g;
+
+    # MathML is too complex; just wipe it all out. If there are
+    # problems, the real validator at Crossref will complain.
+    $title =~ s,<\s*mml:math.*/mml:math\s*>,,g;
+    
+    # No tags should remain.
+    if ($title =~ /</) {
+       die "$0: invalid tags remaining in: $title (original: $orig_title)\n";
+    }
 }
 
-
 ###############################################################
-#  Sanitization of a text string
+# Simplistic TeX-to-html
+# (no-op for rpi text if --input-is-xml was given).
 ###############################################################
 sub SanitizeText {
     my $string = shift;
-    $string = convert($string);
-    $string =~ s/\\newblock//g;
-    $string =~ s/\\bgroup//g;
-    $string =~ s/\\egroup//g;
-    $string =~ s/\\scshape//g;
-    $string =~ s/\\urlprefix//g;
-    $string =~ s/\\emph//g;
-    $string =~ s/\\textbf//g;
-    $string =~ s/\\enquote//g;
-    $string =~ s/\\url/URL: /g;
-    $string =~ s/\\doi/DOI: /g;
-    $string =~ s/\\\\/ /g;
-    $string =~ s/\$//g;
-    $string =~ s/\\checkcomma/,/g;
-    $string =~ s/~/ /g;
-    $string =~ s/[\{\}]//g;
+    return $string if $opts{xi}; # do nothing if --rpi-is-xml
+    return SanitizeTextAlways($string);
+}
+
+# Split into two functions so we can sanitize bbl but not rpi.
+sub SanitizeTextAlways {
+    my $string = shift;
+   
+    # pass user hook subroutine if defined.
+    my @hook = (defined(&{"LaTeX_ToUnicode_convert_hook"}))
+               ? ("hook" => \&LaTeX_ToUnicode_convert_hook)
+               : ();
+
+    # conversion of accented control sequences to characters, etc.
+    # Let's use &#uuuu; entities instead of literal UTF-8; Crossref
+    # recommends it, and it's easier for postprocessing.
+    $string = LaTeX::ToUnicode::convert($string, entities => 1, @hook);
+    
     return $string;
 }
 
 ################################################################
-# Printing one author
+# Printing one author in arg ORIG_AUTHOR, in sequence SEQ.
 ################################################################
 sub PrintAuthor {
-    my $author=shift;
+    my ($orig_author,$seq) = @_;
 
+    # recognize extra directives, either |organization|
+    # or |orcid=<value>|.
+    my $organization = 0;
+    my $orcid = 0;
+    my $author = "";
+    my @name_parts = split (/\|/, $orig_author);
+    foreach my $np (@name_parts) {
+        $np =~ s/^\s*(.*)\s*$/$1/s; # remove leading and trailing whitespace
+        if ($np eq "organization") {
+            $organization = 1;
+        } elsif ($np =~ /^orcid/) {
+            ($orcid = $np) =~ s/^orcid\s*=//;
+            $orcid =~ s/\s//g; # remove all whitespace from value
+            if (! $orcid) {
+                warn "$0: ignoring empty orcid specified in: $orig_author\n";
+            }
+        } elsif (! $np) {
+            # silently ignore empty part, as in ||
+        } else {
+            if ($author) {
+                die ("$0: already saw author name `$author', should not"
+                     . " have second: $np\n");
+            }
+            $author = $np;
+        }
+    }
+    
+    if ($organization && $orcid) {
+        die ("$0: orcid and organization cannot both be present in:"
+             . " $orig_author\n");
+    }
+
+    # for organizations, nothing to do but output it.
+    if ($organization) {
+        my $line = SanitizeText($author);
+        print OUT <<END;
+        <organization>$line</organization>
+END
+        return;
+    }
+    
+    # what's left is the common case of a person, not an organization.
+    print OUT <<END;
+        <person_name sequence="$seq" contributor_role="author">
+END
+
+
     my $person=new BibTeX::Parser::Author ($author);
 
     if ($person->first) {
-	my $line = $person->first;
-	$line = SanitizeText($line);
-	print OUT <<END;
-            <given_name>$line</given_name>
+        my $line = $person->first;
+        $line = SanitizeText($line);
+        print OUT <<END;
+          <given_name>$line</given_name>
 END
-
     }
 
     if ($person->last) {
-	my $line = SanitizeText($person->last);
-	if ($person->von) {
-	    $line = SanitizeText($person->von)." $line";
-	}
-	print OUT <<END;
-            <surname>$line</surname>
+        my $line = SanitizeText($person->last);
+        if ($person->von) {
+            $line = SanitizeText($person->von)." $line";
+        }
+        print OUT <<END;
+          <surname>$line</surname>
 END
-
     }
 
     if ($person->jr) {
-	my $line = SanitizeText($person->jr);
-	print OUT <<END;
-            <suffix>$line</suffix>
+        my $line = SanitizeText($person->jr);
+        print OUT <<END;
+          <suffix>$line</suffix>
 END
+    }
 
+    if ($orcid) {
+        print OUT <<END;
+          <ORCID>https://orcid.org/$orcid</ORCID>
+END
     }
 
+    print OUT <<END;
+        </person_name>
+END
 }
 
 #############################################################
@@ -427,35 +794,92 @@
 #############################################################
 sub PrintCitation {
     my $paperhash=shift;
+
     foreach my $key (keys (%{$paperhash})) {
-	my $citation=$paperhash->{$key};
-	$citation=SanitizeText($citation);
+	my $citation = $paperhash->{$key};
+	$citation = SanitizeTextAlways($citation);
 
 	print OUT <<END;
-          <citation key="$key">
-             <unstructured_citation>
-               $citation
-             </unstructured_citation>
-          </citation>
+        <citation key="$key"><unstructured_citation>
+          $citation
+        </unstructured_citation></citation>
 END
+    }
 }
 
+##############################################################
+#  Return publication_type attribute for <journal_article>, given $PUBTYPE.
+#  https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#publication_type.atts
+#  
+#  If not specified in input, return " publication_type=full_text" since
+#  it was hardwired that way before. If set to "omit", return empty
+#  string. Else return " publication_type=$PUBTYPE>, if the value
+#  is valid. If not, die. (Leading space is so result can be directly used.)
+##############################################################
+sub GetPublicationType {
+    my $pubtype = shift;
+    my $ret;
+
+    if (! $pubtype) {
+        $ret = "full_text"; 
+    } elsif ($pubtype eq "omit") {
+        $ret = "";
+    } elsif ($pubtype =~ /^(abstract_only|full_text|bibliographic_record)$/) {
+        $ret = $pubtype;
+    } else {
+        die "$0: invalid publication_type: $pubtype\n";
+    }
+    
+    $ret = " publication_type=\"$ret\"" if $ret;
+    return $ret;
 }
 
 ##############################################################
-#  Calculating URL
+#  Calculating URL. Res Philosophica gets special treatment.
 ##############################################################
-
 sub GetURL {
     my $paper = shift;
 
     my $result;
     if ($paper->{paperUrl}) {
-	$result= $paper->{paperUrl}
+	$result = $paper->{paperUrl}
+
+    } elsif ($paper->{doi} =~ m,^10\.11612/resphil,) {
+	my $doi = $paper->{doi};
+	$result = 'http://www.pdcnet.org/oom/service?url_ver=Z39.88-2004&rft_val_fmt=&rft.imuse_synonym=resphilosophica&rft.DOI='.$doi.'&svc_id=info:www.pdcnet.org/collection';
+
     } else {
-	my $doi=$paper->{doi};
-	$result= 'http://www.pdcnet.org/oom/service?url_ver=Z39.88-2004&rft_val_fmt=&rft.imuse_synonym=resphilosophica&rft.DOI='.$doi.'&svc_id=info:www.pdcnet.org/collection';
+        die ("$0: paperUrl field is required\n  "
+             . &debug_hash_as_string("whole hash", $paper));
     }
-    $result =~ s/&/&/g;
+    
+    $result =~ s/&/&#x26;/g; # amp(ersand)
     return $result;
 }
+
+
+##############################################################
+#  debug_hash_as_string($LABEL, HASH)
+#
+# Return LABEL followed by HASH elements, followed by a newline, as a
+# single string. If HASH is a reference, it is followed (but no recursive
+# derefencing).
+###############################################################
+sub debug_hash_as_string {
+  my ($label) = shift;
+  my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
+
+  my $str = "$label: {";
+  my @items = ();
+  for my $key (sort keys %hash) {
+    my $val = $hash{$key};
+    $val = ".undef" if ! defined $val;
+    $key =~ s/\n/\\n/g;
+    $val =~ s/\n/\\n/g;
+    push (@items, "$key:$val");
+  }
+  $str .= join (",", @items);
+  $str .= "}";
+
+  return "$str\n";
+}

Modified: trunk/Master/texmf-dist/tex/latex/crossrefware/ltx2crossrefxml.cfg
===================================================================
--- trunk/Master/texmf-dist/tex/latex/crossrefware/ltx2crossrefxml.cfg	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/texmf-dist/tex/latex/crossrefware/ltx2crossrefxml.cfg	2021-10-03 20:32:45 UTC (rev 60689)
@@ -1,22 +1,21 @@
-#
 # Configuration file for ltx2crossrefxml.pl
-#
 
 # Name of the depositor
-$depositorName='NAME';
+$depositorName = 'NAME';
 
-# Email
-$depositorEmail='EMAIL';
+# Email of the depositor
+$depositorEmail = 'EMAIL';
 
 # Organization
-$registrant='ORGANIZATION';
+$registrant = 'ORGANIZATION';
 
-# The rest is self-evident
-$fullTitle = "TITLE";
-$abbrevTitle = "TTL";
-$issn = "NNNN-NNNNNN";
-$coden = "CODEN";
+# Journal-specific information.
+$fullTitle = 'JOURNAL TITLE';
+$issn = 'NNNN-NNNNNN';
 
+# These two are optional. Uncomment and specify if your journal uses them.
+#$abbrevTitle = 'ABBR. TTL.';
+#$coden = 'CODEN';
 
 # The last line must be 1;
-1;
\ No newline at end of file
+1;

Modified: trunk/Master/tlpkg/libexec/ctan2tds
===================================================================
--- trunk/Master/tlpkg/libexec/ctan2tds	2021-10-03 20:31:26 UTC (rev 60688)
+++ trunk/Master/tlpkg/libexec/ctan2tds	2021-10-03 20:32:45 UTC (rev 60689)
@@ -2762,6 +2762,7 @@
  'cluttex',             'NULL',			# leave Makefile
  'cmextra',             'NULL',
  'concmath-fonts',      'NULL',
+ 'crossrefware',	'NULL',
  'crossword',           $standardsource . '|AcrossLite',
  'ctan-o-mat',          'NULL',                 # process .bat
  'cyrplain',            'NULL',                 # all in tex



More information about the tex-live-commits mailing list.