texlive[54190] Master/tlpkg: TLPSRC.pm (from_file),

commits+karl at tug.org commits+karl at tug.org
Mon Mar 9 00:14:29 CET 2020


Revision: 54190
          http://tug.org/svn/texlive?view=revision&revision=54190
Author:   karl
Date:     2020-03-09 00:14:29 +0100 (Mon, 09 Mar 2020)
Log Message:
-----------
TLPSRC.pm (from_file),
TeXCatalogue.pm (beautify): kludge around long url being forcibly broken
  by our multilineformat.
tl-update-tlpdb: new option --catalogue-dump to dump/restore catalogue
  data for development.

Modified Paths:
--------------
    trunk/Master/tlpkg/TeXLive/TLPSRC.pm
    trunk/Master/tlpkg/TeXLive/TeXCatalogue.pm
    trunk/Master/tlpkg/bin/tl-update-tlpdb

Modified: trunk/Master/tlpkg/TeXLive/TLPSRC.pm
===================================================================
--- trunk/Master/tlpkg/TeXLive/TLPSRC.pm	2020-03-08 23:11:13 UTC (rev 54189)
+++ trunk/Master/tlpkg/TeXLive/TLPSRC.pm	2020-03-08 23:14:29 UTC (rev 54190)
@@ -240,6 +240,8 @@
   $shortdesc =~ s/\s+$//g;  # rm trailing whitespace (shortdesc)
   $longdesc =~ s/\s+$//g;   # rm trailing whitespace (longdesc)
   $longdesc =~ s/\s\s+/ /g; # collapse multiple whitespace characters to one
+  # see comments in beautify.
+  $longdesc =~ s,http://grants.nih.gov/,grants.nih.gov/,g;
   #
   $self->name($name);
   $self->category($category);

Modified: trunk/Master/tlpkg/TeXLive/TeXCatalogue.pm
===================================================================
--- trunk/Master/tlpkg/TeXLive/TeXCatalogue.pm	2020-03-08 23:11:13 UTC (rev 54189)
+++ trunk/Master/tlpkg/TeXLive/TeXCatalogue.pm	2020-03-08 23:14:29 UTC (rev 54190)
@@ -1,6 +1,6 @@
 # $Id$
 # TeXLive::TeXCatalogue - module for accessing the TeX Catalogue
-# Copyright 2007-2019 Norbert Preining
+# Copyright 2007-2020 Norbert Preining
 # This file is licensed under the GNU General Public License version 2
 # or any later version.
 # 
@@ -137,14 +137,28 @@
 
 sub beautify {
   my ($txt) = @_;
+  # transliterate to ascii: it allows the final tlpdb to be pure ascii,
+  # avoiding problems since we don't control the user's terminal encoding
+  # Do first in case spaces are output by the transliteration.
+  $txt = Text::Unidecode::unidecode($txt);
+  #
   $txt =~ s/\n/ /g;  # make one line
   $txt =~ s/^\s+//g; # rm leading whitespace
   $txt =~ s/\s+$//g; # rm trailing whitespace
   $txt =~ s/\s\s+/ /g; # collapse multiple whitespace characters to one
   $txt =~ s/\t/ /g;    # tabs to spaces
-  # transliterate to ascii: it allows the final tlpdb to be pure ascii,
-  # avoiding problems since we don't control the user's terminal encoding
-  return Text::Unidecode::unidecode($txt);
+  
+  # one last bit of horribleness: there is one url in the descriptions
+  # which is longer than our multilineformat format (in TLPOBJ). The
+  # result is that it is forcibly broken. Apparently there is no way in
+  # Perl to override that. This makes it impossible to get identical
+  # longdesc results. Turns out that removing the "http://" prefix
+  # shortens it enough to fit, so do that. The better solution would be
+  # to use Text::Wrap or some other text-filling code, but going for
+  # quick and dirty here.
+  $txt =~ s,http://grants.nih.gov/,grants.nih.gov/,g;
+
+  return $txt;
 }
 
 sub name {

Modified: trunk/Master/tlpkg/bin/tl-update-tlpdb
===================================================================
--- trunk/Master/tlpkg/bin/tl-update-tlpdb	2020-03-08 23:11:13 UTC (rev 54189)
+++ trunk/Master/tlpkg/bin/tl-update-tlpdb	2020-03-08 23:14:29 UTC (rev 54190)
@@ -29,6 +29,7 @@
 
 my $opt_catalogue = "";
 my $opt_catalogue_compare = "";
+my $opt_catalogue_dump = "";
 my $opt_dry_run = 0;
 my $opt_fix_reverse_revisions = 0;  # needs svn commit
 my $opt_fromfiles = 0;
@@ -49,6 +50,7 @@
 GetOptions(
     "catalogue=s"                => \$opt_catalogue,
     "catalogue-compare=s"        => \$opt_catalogue_compare,
+    "catalogue-dump=s"           => \$opt_catalogue_dump,
     "dry-run|n",                 => \$opt_dry_run,
     "fix-reverse-revisions!"     => \$opt_fix_reverse_revisions,
     "from-files"                 => \$opt_fromfiles,
@@ -96,22 +98,28 @@
   
   our $tlc = undef;
   if (-r $opt_catalogue) {
-    info("$prg: reading TeX Catalogue $opt_catalogue ...\n");
-    $tlc = TeXLive::TeXCatalogue->new("location" => $opt_catalogue);
-    # way to dump the catalogue and then use it instead of reparsing.
-    # Maybe someday have the energy to make it a real option,
-    # and merge this with the same code in TLUtils as a common function.
-    #require "/home/karl/cat.pl";
-    #if (0) {
-    #  require Data::Dumper;
-    #  $Data::Dumper::Indent = 1;
-    #  $Data::Dumper::Sortkeys = 1;  # stable output
-    #  $Data::Dumper::Purity = 1; # recursive structures must be safe
-    #  print STDERR "DDdumping tlc(atalogue)\n";
-    #  open (my $fh, ">/home/karl/cat.pl") || die "open(cat) failed: $!\n";
-    #  print $fh Data::Dumper->Dump([$tlc], [qw(::tlc)]);
-    #  close ($fh) || die "close(cat) failed: $!\n";
-    #}
+    if (-s $opt_catalogue_dump) { # if dump file exists, use it.
+      info("$prg: reading TeX Catalogue dump $opt_catalogue_dump ...\n");
+      require $opt_catalogue_dump;
+    } else {
+      info("$prg: reading TeX Catalogue $opt_catalogue ...\n");
+      $tlc = TeXLive::TeXCatalogue->new("location" => $opt_catalogue);
+      if ($opt_catalogue_dump) {
+        # Way to dump the catalogue and then use it instead of reparsing,
+        # to save time when developing/debugging.
+        # Maybe someday have the energy to merge this with the same dumping
+        # code in TLUtils as a common function.
+        require Data::Dumper;
+        $Data::Dumper::Indent = 1;
+        $Data::Dumper::Sortkeys = 1;  # stable output
+        $Data::Dumper::Purity = 1; # recursive structures must be safe
+        info("$prg: dumping TeX Catalogue to $opt_catalogue_dump ...\n");
+        my $catdump = ">$opt_catalogue_dump";
+        open (my $fh, $catdump) || die "open($catdump) failed: $!\n";
+        print $fh Data::Dumper->Dump([$tlc], [qw(::tlc)]);
+        close ($fh) || die "close($catdump) failed: $!\n";
+      }
+    }
   } else {
     tlwarn("$prg: reusing Catalogue data, since source is not readable: "
            . "$opt_catalogue\n");
@@ -272,7 +280,7 @@
           # change, we have either rA < rB or rA > rB
           if ($rA > $rB) {
             tlwarn("$prg: BIG WARNING for $p: file lists didn't change "
-                   . "but revision is going backward! Very strange!\n");
+                   . "but revision is going backward ($rA > $rB)!\n");
             $packages_needing_fixup{$p} = 1;
           } # if strange revision change
         }   # no revision changes
@@ -570,14 +578,14 @@
   if (! &equal_strings($tlpA->shortdesc, $tlpB->shortdesc)) {
     # Maybe someday combine this silly code to avoid redundancy,
     # and show strings (hashes) for all.
-    debug("$pkg:  shortdesc changed: ", $tlpA->shortdesc, "\n",
+    debug("$pkg:  shortdesc now: ", $tlpA->shortdesc, "\n",
           " " x length($pkg),
-               "                 vs. ", $tlpB->shortdesc, "\n");
+            "    vs. compare db:", $tlpB->shortdesc, "\n");
     
   } elsif (! &equal_strings($tlpA->longdesc, $tlpB->longdesc)) {
-    debug("$pkg:  longdesc changed: ", $tlpA->longdesc, "\n",
+    debug("$pkg:  longdesc now: ", $tlpA->longdesc, "\n",
           " " x length($pkg),
-               "                vs. ", $tlpB->longdesc, "\n");
+            "   vs. compare db: ", $tlpB->longdesc, "\n");
 
   } elsif (! &equal_strings($tlpA->catalogue, $tlpB->catalogue)) {
     debug("$pkg: catalogue value changed\n");
@@ -732,6 +740,23 @@
 This implies C<--reverse-revision-check>, since the same mechanism is
 used to force the new version numbers.
 
+=item B<--catalogue-dump> I<file>
+
+This is for debugging and development. If specified, and I<file> is
+nonempty, it is <C>require</C>d, instead of reading the XML files from
+I<Catalogue_dir> from C<--catalogue>. If I<file> is empty or does not
+exist, the XML tree is read, and then dumped (with L<Data::Dumper>) to
+I<file>.
+
+The idea is to specify this, the dump gets written, and then subsequent
+runs will use it, which is much faster than reading the XML. Don't
+forget to delete the file when done, though, since there is no check for
+staleness.
+
+Also, don't use this if you are actually working on the Catalogue
+reading routines (L<TeXLive::TeXCatalogue>), since they won't get
+invoked at all if the dump is read.
+
 =item B<--fix-reverse-revisions>
 
 If a package with revision number going backward is found, this option



More information about the tex-live-commits mailing list.