texlive[71186] trunk: bibcop

commits+karl at tug.org commits+karl at tug.org
Mon May 6 18:47:17 CEST 2024


Revision: 71186
          https://tug.org/svn/texlive?view=revision&revision=71186
Author:   karl
Date:     2024-05-06 18:47:16 +0200 (Mon, 06 May 2024)
Log Message:
-----------
bibcop

Modified Paths:
--------------
    trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl
    trunk/Master/texmf-dist/doc/bibtex/bibcop/README.md
    trunk/Master/texmf-dist/doc/bibtex/bibcop/bibcop.pdf
    trunk/Master/texmf-dist/doc/man/man1/bibcop.1
    trunk/Master/texmf-dist/doc/man/man1/bibcop.man1.pdf
    trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl
    trunk/Master/texmf-dist/source/bibtex/bibcop/bibcop.dtx
    trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty

Modified: trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl	2024-05-06 16:47:16 UTC (rev 71186)
@@ -21,13 +21,13 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-# 2024-03-15 0.0.20
+# 2024-05-03 0.0.21
 package bibcop;
 
 use warnings;
 use strict;
+use POSIX;
 use File::Basename;
-use Time::Piece;
 
 # Hash of incoming command line arguments.
 my %args = map { $_ => 1 } @ARGV;
@@ -40,11 +40,12 @@
   'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'month?', 'publisher?', 'pages?'],
   'inproceedings' => ['doi', 'booktitle', 'title', 'author', 'year', 'pages?', 'month?', 'organization?', 'volume?'],
   'book' => ['title', 'author', 'year', 'publisher', 'doi?'],
-  'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'month?', 'publisher?', 'organization?', 'doi?', 'howpublished?', 'note?'],
+  'phdthesis' => ['title', 'author', 'year', 'school', 'doi?'],
+  'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'month?', 'publisher?', 'organization?', 'doi?', 'howpublished?', 'note?', 'pages?', 'number?', 'volume?'],
 );
 
 # See https://research.arizona.edu/faq/what-do-you-mean-when-you-say-use-title-case-proposalproject-titles
-my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into/;
+my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into vs/;
 
 # Check the presence of mandatory tags.
 sub check_mandatory_tags {
@@ -53,6 +54,9 @@
   }
   my (%entry) = @_;
   my $type = $entry{':type'};
+  if (not exists $blessed{$type}) {
+    return "The type of entry is not allowed: '$type'"
+  }
   my $mandatory = $blessed{$type};
   foreach my $tag (@$mandatory) {
     if ($tag =~ /^.*\?$/) {
@@ -90,29 +94,35 @@
     if (not exists $tags{$tag}) {
       next;
     }
-    my $tailed = qr/^.+(:|\?)$/;
+    my @ends = qw/ ; ? . --- : ! /;
     my $value = $entry{$tag};
     my @words = only_words($value);
     my $pos = 0;
     foreach my $word (@words) {
+      $word =~ s/\.$//g;
       $pos = $pos + 1;
       if (not $word =~ /^[A-Za-z]/) {
         next;
       }
+      if ($word =~ /^\{.*|.*\}$/) {
+        next;
+      }
       if (exists $minors{$word}) {
         if ($pos eq 1) {
-          return "The minor word in the '$tag' must be upper-cased since it is the first one"
+          return "The minor word '$word' in the '$tag' must be upper-cased since it is the first one"
         }
-        if (not $words[$pos - 2] =~ $tailed) {
-          next;
+        my $before = $words[$pos - 2];
+        if (grep(/^$before$/, @ends)) {
+          return "The minor word '$word' in the '$tag' must be upper-cased, because it follows the '$before'"
         }
-        return "The minor word in the '$tag' must be upper-cased, because it follows the colon"
+        next;
       }
       if (exists $minors{lc($word)}) {
         if ($pos eq 1) {
           next;
         }
-        if ($words[$pos - 2] =~ $tailed) {
+        my $before = $words[$pos - 2];
+        if (grep(/^$before$/, @ends)) {
           next;
         }
         return "All minor words in the '$tag' must be lower-cased, while @{[as_position($pos)]} word '$word' is not"
@@ -156,6 +166,9 @@
       if ($name =~ /^[A-Z][^.]+$/) {
         next
       }
+      if ($name =~ /^(van|de|der|dos)$/) {
+        next
+      }
       if ($name =~ /^[A-Z]$/) {
         return "A shortened name must have a tailing dot in @{[as_position($pos)]} 'author', as in 'Knuth, Donald E.'";
       }
@@ -173,11 +186,17 @@
       next;
     }
     my $value = $entry{$tag};
-    my @words = only_words($value);
+    my @words = split(/ /, clean_tex($value));
     foreach my $word (@words) {
       if (not $word =~ /^[A-Za-z]/) {
         next;
       }
+      if ($word eq 'vs.') {
+        next;
+      }
+      if ($word =~ /\.\.\.$/) {
+        next;
+      }
       if ($word =~ /^.*\.$/) {
         return "Do not shorten the words in the '$tag', such as '$word'"
       }
@@ -275,6 +294,7 @@
   my @no_space_after = ( '(', '[' );
   my @space_before = ( '(', '[' );
   my @space_after = ( ')', ']' );
+  my @good_tails = ( 'Inc.', 'Ltd.' );
   my @bad_tails = ( '.', ',', ';', ':', '-' );
   foreach my $tag (keys %entry) {
     if ($tag =~ /^:.*/) {
@@ -288,9 +308,17 @@
       if ($s eq '.' and $tag eq 'author') {
         next;
       }
-      if ($value =~ /^.*\Q$s\E$/) {
-        return "The '$tag' must not end with a $symbols{$s}"
+      my $good = 0;
+      foreach my $s (@good_tails) {
+        if ($value =~ /^.*\Q$s\E$/) {
+          $good = 1;
+        }
       }
+      if (not $good) {
+        if ($value =~ /^.*\Q$s\E$/) {
+          return "The '$tag' must not end with a $symbols{$s}"
+        }
+      }
     }
     foreach my $s (@no_space_before) {
       if ($value =~ /^.*\s\Q$s\E.*$/) {
@@ -309,7 +337,7 @@
     }
     foreach my $s (@space_after) {
       my $p = join('', @no_space_before);
-      if ($value =~ /^.*\Q$s\E[^\}\s\Q$p\E].*$/) {
+      if ($value =~ /^.*[^\\]\Q$s\E[^\}\s\Q$p\E].*$/) {
         return "In the '$tag', put a space after the $symbols{$s}"
       }
     }
@@ -401,7 +429,7 @@
   my (%entry) = @_;
   if (exists $entry{'doi'}) {
     my $doi = $entry{'doi'};
-    if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(><:;]+$/) {
+    if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(><:;\/]+$/) {
       return "The format of the 'doi' is wrong"
     }
   }
@@ -543,7 +571,7 @@
     }
     my $value = clean_tex($entry{$tag});
     if ($tag eq 'url') {
-      my $today = localtime->strftime('%d-%m-%Y');
+      my $today = strftime('%d-%m-%Y', localtime(time));
       push(@lines, "  howpublished = {\\url{$value}},");
       push(@lines, "  note = {[Online; accessed $today]},");
       next;
@@ -557,6 +585,7 @@
       no strict 'refs';
       $value = $fixer->($value);
     }
+    $value = fix_unicode($value);
     if ($tag =~ /title|booktitle|journal/) {
       $value = '{' . $value . '}';
     }
@@ -579,17 +608,18 @@
   my @authors = split(/\s+and\s+/, $value);
   foreach my $author (@authors) {
     $author =~ s/^\s+|\s+$//g;
-    if (index($author, '{') != -1 or index($author, '}') != -1) {
+    if (index($author, ' {') != -1 or index($author, '} ') != -1) {
       next;
     }
     $author =~ s/ ([A-Z])($| )/ $1.$2/g;
-    if (index($author, ',') eq -1) {
+    if (index($author, ',') == -1) {
       my @words = split(/\s+/, $author);
       my $total = @words+0;
-      if ($total gt 1) {
+      if ($total > 1) {
         $author = $words[$total - 1] . ', ' . join(' ', @words[0 .. $total - 2]);
       }
     }
+    $author =~ s/([A-Z])\.(?![ ,])/$1. /g;
     $author =~ s/^\s+|\s+$//g;
   }
   return join(' and ', @authors);
@@ -601,6 +631,35 @@
   return $value;
 }
 
+sub fix_month {
+  my ($value) = @_;
+  my %months = (
+    '1' => 'jan',
+    '2' => 'feb',
+    '3' => 'mar',
+    '4' => 'apr',
+    '5' => 'may',
+    '6' => 'jun',
+    '7' => 'jul',
+    '8' => 'aug',
+    '9' => 'sep',
+    '10' => 'oct',
+    '11' => 'nov',
+    '12' => 'dec',
+  );
+  $value =~ s/^0+//g;
+  if ($value =~ /^11|12|[0-9]$/) {
+    $value = $months{$value};
+  } else {
+    my %rev = reverse %months;
+    my $lc = substr(lc($value), 0, 3);
+    if (exists $rev{$lc}) {
+      $value = $lc;
+    }
+  }
+  return $value;
+}
+
 sub fix_capitalization {
   my ($value) = @_;
   my @words = split(/\s+/, $value);
@@ -610,17 +669,37 @@
     if (not $word =~ /^[A-Za-z]/) {
       next;
     }
-    my $lc = lc($word);
-    if (exists $minors{$lc} and $pos gt 1 and not $words[$pos - 2] =~ /:$/) {
-      $word = $lc;
-      next;
+    my $start = 1;
+    if ($pos > 1) {
+      my $before = $words[$pos - 2];
+      if (not $before =~ /(:|\?|!|;|-)$/) {
+        $start = 0;
+      }
     }
-    if ($word =~ /^[a-z].*/) {
-      $word =~ s/^([a-z])/\U$1/g;
+    my @parts = split(/-/, $word, -1);
+    my $p = 0;
+    foreach my $part (@parts) {
+      $p += 1;
+      my $lcp = lc($part);
+      my $head = $lcp;
+      $head =~ s/[,\.!\?;:]$//g;
+      if (exists $minors{$head}) {
+        if ($p > 1) {
+          my $pre = $parts[$p - 2];
+          if (not $pre eq '') {
+            $part = $lcp;
+            next;
+          }
+        } elsif (@parts+0 == 1) {
+          if (not $start) {
+            $part = $lcp;
+            next;
+          }
+        }
+      }
+      $part =~ s/^([a-z])/\U$1/g;
     }
-    if (index($word, '-') != -1) {
-      $word =~ s/-([a-z])/-\U$1/g;
-    }
+    $word = join('-', @parts);
   }
   return join(' ', @words);
 }
@@ -628,6 +707,8 @@
 sub fix_title {
   my ($value) = @_;
   $value = fix_capitalization($value);
+  $value =~ s/([^ ])---/$1 ---/g;
+  $value =~ s/---([^ ])/--- $1/g;
   return $value;
 }
 
@@ -653,7 +734,9 @@
   if ($left !~ /^[0-9]*$/ or $right !~ /^[0-9]*$/) {
     return $value;
   }
-  if ($left + 0 gt $right + 0) {
+  $left = $left + 0;
+  $right = $right + 0;
+  if ($left > $right) {
     my $tmp = $left;
     $left = $right;
     $right = $tmp;
@@ -675,6 +758,22 @@
   foreach my $org (@orgs) {
     $value =~ s/ \Q$org\E / /g;
   }
+  my %numbers = (
+    'First' => '1st',
+    'Second' => '2nd',
+    'Third' => '3rd',
+    'Fourth' => '4th',
+    'Fifth' => '5th',
+    'Sixth' => '6th',
+    'Seventh' => '7th',
+    'Eighth' => '8th',
+    'Nineth' => '9th',
+    'Tenth' => '10th'
+  );
+  keys %numbers;
+  while(my($left, $right) = each %numbers) {
+    $value =~ s/^Proceedings of the \Q$left\E /Proceedings of the $right /g;
+  }
   return $value;
 }
 
@@ -707,6 +806,28 @@
   return $value;
 }
 
+sub fix_unicode {
+  my ($value) = @_;
+  my %literals = (
+    'ò' => '\`{o}', 'ó' => '\\\'{o}', 'ô' => '\^{o}', 'ö' => '\"{o}', 'ő' => '\H{o}', 'ǒ' => '\v{o}', 'õ' => '\~{o}',
+    'à' => '\`{a}', 'á' => '\\\'{a}', 'â' => '\^{a}', 'ä' => '\"{a}', 'å' => '\r{a}', 'ą' => '\k{a}', 'ǎ' => '\v{a}', 'ã' => '\~{a}',
+    'ù' => '\`{u}', 'ú' => '\\\'{u}', 'û' => '\^{u}', 'ü' => '\"{u}', 'ů' => '\r{u}', 'ǔ' => '\v{u}', 'ũ' => '\~{u}',
+    'ì' => '\`{i}', 'í' => '\\\'{i}', 'î' => '\^{i}', 'ï' => '\"{i}', 'ǐ' => '\v{i}', 'ĩ' => '\~{i}',
+    'ń' => '\\\'{n}', 'ň' => '\v{n}', 'ñ' => '\~{n}',
+    'ç' => '\c{c}',
+    'ł' => '\l{}',
+    'ı' => '{\i}',
+    'ø' => '\o{}',
+    '–' => '--', '—' => '---',
+    '’' => '\''
+  );
+  keys %literals;
+  while(my($k, $v) = each %literals) {
+    $value =~ s/\Q$k\E/$v/g;
+  }
+  return $value;
+}
+
 # Parse the incoming .bib file and return an array
 # of hash-maps, where each one is a bibentry.
 sub entries {
@@ -832,7 +953,12 @@
 # Takes the text and returns only list of words seen there.
 sub only_words {
   my ($tex) = @_;
-  return split(/[ \-]/, clean_tex($tex));
+  my $t = clean_tex($tex);
+  $t =~ s/([^a-zA-Z0-9\\'])/ $1 /g;
+  $t =~ s/- +- +-/---/g;
+  $t =~ s/{ /{/g;
+  $t =~ s/ }/}/g;
+  return split(/ +/, $t);
 }
 
 # Take a TeX string and return a cleaner one, without redundant spaces, brackets, etc.
@@ -958,7 +1084,7 @@
     "      --latex     Report errors in LaTeX format using \\PackageWarningNoLine command\n\n" .
     "If any issues, report to GitHub: https://github.com/yegor256/bibcop");
 } elsif (exists $args{'--version'} or exists $args{'-v'}) {
-  info('0.0.20 2024-03-15');
+  info('0.0.21 2024-05-03');
 } else {
   my ($file) = grep { not($_ =~ /^-.*$/) } @ARGV;
   if (not $file) {
@@ -997,7 +1123,7 @@
         $found += 1;
       }
     }
-    if ($found gt 0) {
+    if ($found > 0) {
       debug("$found problem(s) found");
       fail();
     }

Modified: trunk/Master/texmf-dist/doc/bibtex/bibcop/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/bibtex/bibcop/README.md	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Master/texmf-dist/doc/bibtex/bibcop/README.md	2024-05-06 16:47:16 UTC (rev 71186)
@@ -1,5 +1,7 @@
-<img src="https://raw.githubusercontent.com/yegor256/bibcop/master/bibcop-logo.svg" height="92px"/>
+# Bibcop: Style Checker of BibTeX .bib files
 
+![bibcop logo](bibcop-logo.png)
+
 [![l3build](https://github.com/yegor256/bibcop/actions/workflows/l3build.yml/badge.svg)](https://github.com/yegor256/bibcop/actions/workflows/l3build.yml)
 [![CTAN](https://img.shields.io/ctan/v/bibcop)](https://ctan.org/pkg/bibcop)
 [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/yegor256/bibcop/blob/master/LICENSE.txt)
@@ -10,12 +12,12 @@
 [biblatex-check](https://github.com/pezmc/biblatex-check) tools —
 they do _almost_ the same but from the command line.
 
-Read [this blog post](https://www.yegor256.com/2023/09/05/style-checker-for-bibtex-files.html), 
-in order to understand the motivation behind this package.
+Read [this blog post][BLOG],in order to understand
+the motivation behind this package.
 
-First, [install it](https://en.wikibooks.org/wiki/LaTeX/Installing_Extra_Packages)
-from [CTAN](https://ctan.org/pkg/bibcop)
-and then use in the preamble (if you use [BibTeX](http://www.bibtex.org/), for example):
+First, [install it][INSTALL] from [CTAN](https://ctan.org/pkg/bibcop)
+and then use in the preamble
+(if you use [BibTeX](http://www.bibtex.org/), for example):
 
 ```tex
 \documentclass{article}
@@ -26,45 +28,66 @@
 \end{document}
 ```
 
-Otherwise, you can download [`bibcop.sty`](https://raw.githubusercontent.com/yegor256/bibcop/gh-pages/bibcop.sty) and add to your project (together with [`bibcop.pl`](https://raw.githubusercontent.com/yegor256/bibcop/gh-pages/bibcop.pl)!).
+You can also add it as a GitHub Action to your
+GitHub repository, with the help of
+[bibcop-action](https://github.com/yegor256/bibcop-action).
 
-You can also download [`bibcop.pl`](https://raw.githubusercontent.com/yegor256/bibcop/gh-pages/bibcop.pl)
+Otherwise, you can download
+[`bibcop.sty`](https://yegor256.github.io/bibcop/bibcop.sty)
+and add to your project (together with
+[`bibcop.pl`](https://yegor256.github.io/bibcop/bibcop.pl)!).
+
+You can also download
+[`bibcop.pl`](https://yegor256.github.io/bibcop/bibcop.pl)
 and use it as a command line tool
 to check your `.bib` files and to auto-fix them
 (you should have [Perl](https://www.perl.org) installed):
 
+```bash
+perl bibcop.pl --fix main.bib > fixed.bib
 ```
-$ perl bibcop.pl --fix main.bib > fixed.bib
-```
 
-This command will read the `main.bib` file and create `fixed.bib`, which
-will have the fixed and properly formatted content (well, to some extent).
+This command will read the `main.bib` file and
+create `fixed.bib`, which will have the fixed and properly
+formatted content (well, to some extent).
 Be careful, all comments will be removed.
 
-If you install the package using [`tlmgr`](https://www.tug.org/texlive/tlmgr.html),
-you should be able to use `bibcop` directly, without the necessity to mention Perl:
+You can also make changes inline, not creating a new file:
 
+```bash
+perl bibcop.pl --fix --in-place main.bib
 ```
-$ tlgmr install bibcop
-$ bibcop --help
+
+If you install the package using
+[`tlmgr`](https://www.tug.org/texlive/tlmgr.html),
+you should be able to use `bibcop` directly, without the
+necessity to mention Perl:
+
+```bash
+tlgmr install bibcop
+bibcop --help
 ```
 
 ## How to Contribute
 
-If you want to contribute yourself, make a fork, then create a branch, 
-then run `l3build ctan` in the root directory.
-It should compile everything without errors. If not, submit an issue and wait.
-Otherwise, make your changes and then run `l3build ctan` again. If the build is
-still clean, submit a pull request.
+If you want to contribute yourself, make a fork, then create a branch,
+then run `l3build ctan` in the root directory. It should compile
+everything without errors. If not, submit an issue and wait.
+Otherwise, make your changes and then run `l3build ctan` again.
+If the build is still clean, submit a pull request.
 
-If you want to add a new check, add it as a Perl subroutine to the `bibcop.pl` file.
-Don't forget to add a test to one of the test files that stay in the `perl-tests/` directory.
+If you want to add a new check, add it as a Perl subroutine
+to the `bibcop.pl` file. Don't forget to add a test to one of the test
+files that stay in the `perl-tests/` directory.
 When ready, run this, in order to check that all tests pass:
 
 ```bash
-$ perl tests.pl
+perl tests.pl
 ```
 
 You should see the `GREAT!` message.
 
-Copyright (c) 2022-2024 Yegor Bugayenko, MIT License
\ No newline at end of file
+Copyright (c) 2022-2024 Yegor Bugayenko, MIT License
+
+[BLOG]: https://www.yegor256.com/2023/09/05/style-checker-for-bibtex-files.html
+[INSTALL]: https://en.wikibooks.org/wiki/LaTeX/Installing_Extra_Packages

Modified: trunk/Master/texmf-dist/doc/bibtex/bibcop/bibcop.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/man/man1/bibcop.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bibcop.1	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Master/texmf-dist/doc/man/man1/bibcop.1	2024-05-06 16:47:16 UTC (rev 71186)
@@ -1,4 +1,4 @@
-.TH bibcop 1 "2024-03-15"
+.TH bibcop 1 "2024-05-03"
 .SH NAME
 bibcop \- Style Checker and Fixer of BibTeX Files (.bib)
 .SH SYNOPSIS

Modified: trunk/Master/texmf-dist/doc/man/man1/bibcop.man1.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl	2024-05-06 16:47:16 UTC (rev 71186)
@@ -21,13 +21,13 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-# 2024-03-15 0.0.20
+# 2024-05-03 0.0.21
 package bibcop;
 
 use warnings;
 use strict;
+use POSIX;
 use File::Basename;
-use Time::Piece;
 
 # Hash of incoming command line arguments.
 my %args = map { $_ => 1 } @ARGV;
@@ -40,11 +40,12 @@
   'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'month?', 'publisher?', 'pages?'],
   'inproceedings' => ['doi', 'booktitle', 'title', 'author', 'year', 'pages?', 'month?', 'organization?', 'volume?'],
   'book' => ['title', 'author', 'year', 'publisher', 'doi?'],
-  'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'month?', 'publisher?', 'organization?', 'doi?', 'howpublished?', 'note?'],
+  'phdthesis' => ['title', 'author', 'year', 'school', 'doi?'],
+  'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'month?', 'publisher?', 'organization?', 'doi?', 'howpublished?', 'note?', 'pages?', 'number?', 'volume?'],
 );
 
 # See https://research.arizona.edu/faq/what-do-you-mean-when-you-say-use-title-case-proposalproject-titles
-my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into/;
+my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into vs/;
 
 # Check the presence of mandatory tags.
 sub check_mandatory_tags {
@@ -53,6 +54,9 @@
   }
   my (%entry) = @_;
   my $type = $entry{':type'};
+  if (not exists $blessed{$type}) {
+    return "The type of entry is not allowed: '$type'"
+  }
   my $mandatory = $blessed{$type};
   foreach my $tag (@$mandatory) {
     if ($tag =~ /^.*\?$/) {
@@ -90,29 +94,35 @@
     if (not exists $tags{$tag}) {
       next;
     }
-    my $tailed = qr/^.+(:|\?)$/;
+    my @ends = qw/ ; ? . --- : ! /;
     my $value = $entry{$tag};
     my @words = only_words($value);
     my $pos = 0;
     foreach my $word (@words) {
+      $word =~ s/\.$//g;
       $pos = $pos + 1;
       if (not $word =~ /^[A-Za-z]/) {
         next;
       }
+      if ($word =~ /^\{.*|.*\}$/) {
+        next;
+      }
       if (exists $minors{$word}) {
         if ($pos eq 1) {
-          return "The minor word in the '$tag' must be upper-cased since it is the first one"
+          return "The minor word '$word' in the '$tag' must be upper-cased since it is the first one"
         }
-        if (not $words[$pos - 2] =~ $tailed) {
-          next;
+        my $before = $words[$pos - 2];
+        if (grep(/^$before$/, @ends)) {
+          return "The minor word '$word' in the '$tag' must be upper-cased, because it follows the '$before'"
         }
-        return "The minor word in the '$tag' must be upper-cased, because it follows the colon"
+        next;
       }
       if (exists $minors{lc($word)}) {
         if ($pos eq 1) {
           next;
         }
-        if ($words[$pos - 2] =~ $tailed) {
+        my $before = $words[$pos - 2];
+        if (grep(/^$before$/, @ends)) {
           next;
         }
         return "All minor words in the '$tag' must be lower-cased, while @{[as_position($pos)]} word '$word' is not"
@@ -156,6 +166,9 @@
       if ($name =~ /^[A-Z][^.]+$/) {
         next
       }
+      if ($name =~ /^(van|de|der|dos)$/) {
+        next
+      }
       if ($name =~ /^[A-Z]$/) {
         return "A shortened name must have a tailing dot in @{[as_position($pos)]} 'author', as in 'Knuth, Donald E.'";
       }
@@ -173,11 +186,17 @@
       next;
     }
     my $value = $entry{$tag};
-    my @words = only_words($value);
+    my @words = split(/ /, clean_tex($value));
     foreach my $word (@words) {
       if (not $word =~ /^[A-Za-z]/) {
         next;
       }
+      if ($word eq 'vs.') {
+        next;
+      }
+      if ($word =~ /\.\.\.$/) {
+        next;
+      }
       if ($word =~ /^.*\.$/) {
         return "Do not shorten the words in the '$tag', such as '$word'"
       }
@@ -275,6 +294,7 @@
   my @no_space_after = ( '(', '[' );
   my @space_before = ( '(', '[' );
   my @space_after = ( ')', ']' );
+  my @good_tails = ( 'Inc.', 'Ltd.' );
   my @bad_tails = ( '.', ',', ';', ':', '-' );
   foreach my $tag (keys %entry) {
     if ($tag =~ /^:.*/) {
@@ -288,9 +308,17 @@
       if ($s eq '.' and $tag eq 'author') {
         next;
       }
-      if ($value =~ /^.*\Q$s\E$/) {
-        return "The '$tag' must not end with a $symbols{$s}"
+      my $good = 0;
+      foreach my $s (@good_tails) {
+        if ($value =~ /^.*\Q$s\E$/) {
+          $good = 1;
+        }
       }
+      if (not $good) {
+        if ($value =~ /^.*\Q$s\E$/) {
+          return "The '$tag' must not end with a $symbols{$s}"
+        }
+      }
     }
     foreach my $s (@no_space_before) {
       if ($value =~ /^.*\s\Q$s\E.*$/) {
@@ -309,7 +337,7 @@
     }
     foreach my $s (@space_after) {
       my $p = join('', @no_space_before);
-      if ($value =~ /^.*\Q$s\E[^\}\s\Q$p\E].*$/) {
+      if ($value =~ /^.*[^\\]\Q$s\E[^\}\s\Q$p\E].*$/) {
         return "In the '$tag', put a space after the $symbols{$s}"
       }
     }
@@ -401,7 +429,7 @@
   my (%entry) = @_;
   if (exists $entry{'doi'}) {
     my $doi = $entry{'doi'};
-    if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(><:;]+$/) {
+    if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(><:;\/]+$/) {
       return "The format of the 'doi' is wrong"
     }
   }
@@ -543,7 +571,7 @@
     }
     my $value = clean_tex($entry{$tag});
     if ($tag eq 'url') {
-      my $today = localtime->strftime('%d-%m-%Y');
+      my $today = strftime('%d-%m-%Y', localtime(time));
       push(@lines, "  howpublished = {\\url{$value}},");
       push(@lines, "  note = {[Online; accessed $today]},");
       next;
@@ -557,6 +585,7 @@
       no strict 'refs';
       $value = $fixer->($value);
     }
+    $value = fix_unicode($value);
     if ($tag =~ /title|booktitle|journal/) {
       $value = '{' . $value . '}';
     }
@@ -579,17 +608,18 @@
   my @authors = split(/\s+and\s+/, $value);
   foreach my $author (@authors) {
     $author =~ s/^\s+|\s+$//g;
-    if (index($author, '{') != -1 or index($author, '}') != -1) {
+    if (index($author, ' {') != -1 or index($author, '} ') != -1) {
       next;
     }
     $author =~ s/ ([A-Z])($| )/ $1.$2/g;
-    if (index($author, ',') eq -1) {
+    if (index($author, ',') == -1) {
       my @words = split(/\s+/, $author);
       my $total = @words+0;
-      if ($total gt 1) {
+      if ($total > 1) {
         $author = $words[$total - 1] . ', ' . join(' ', @words[0 .. $total - 2]);
       }
     }
+    $author =~ s/([A-Z])\.(?![ ,])/$1. /g;
     $author =~ s/^\s+|\s+$//g;
   }
   return join(' and ', @authors);
@@ -601,6 +631,35 @@
   return $value;
 }
 
+sub fix_month {
+  my ($value) = @_;
+  my %months = (
+    '1' => 'jan',
+    '2' => 'feb',
+    '3' => 'mar',
+    '4' => 'apr',
+    '5' => 'may',
+    '6' => 'jun',
+    '7' => 'jul',
+    '8' => 'aug',
+    '9' => 'sep',
+    '10' => 'oct',
+    '11' => 'nov',
+    '12' => 'dec',
+  );
+  $value =~ s/^0+//g;
+  if ($value =~ /^11|12|[0-9]$/) {
+    $value = $months{$value};
+  } else {
+    my %rev = reverse %months;
+    my $lc = substr(lc($value), 0, 3);
+    if (exists $rev{$lc}) {
+      $value = $lc;
+    }
+  }
+  return $value;
+}
+
 sub fix_capitalization {
   my ($value) = @_;
   my @words = split(/\s+/, $value);
@@ -610,17 +669,37 @@
     if (not $word =~ /^[A-Za-z]/) {
       next;
     }
-    my $lc = lc($word);
-    if (exists $minors{$lc} and $pos gt 1 and not $words[$pos - 2] =~ /:$/) {
-      $word = $lc;
-      next;
+    my $start = 1;
+    if ($pos > 1) {
+      my $before = $words[$pos - 2];
+      if (not $before =~ /(:|\?|!|;|-)$/) {
+        $start = 0;
+      }
     }
-    if ($word =~ /^[a-z].*/) {
-      $word =~ s/^([a-z])/\U$1/g;
+    my @parts = split(/-/, $word, -1);
+    my $p = 0;
+    foreach my $part (@parts) {
+      $p += 1;
+      my $lcp = lc($part);
+      my $head = $lcp;
+      $head =~ s/[,\.!\?;:]$//g;
+      if (exists $minors{$head}) {
+        if ($p > 1) {
+          my $pre = $parts[$p - 2];
+          if (not $pre eq '') {
+            $part = $lcp;
+            next;
+          }
+        } elsif (@parts+0 == 1) {
+          if (not $start) {
+            $part = $lcp;
+            next;
+          }
+        }
+      }
+      $part =~ s/^([a-z])/\U$1/g;
     }
-    if (index($word, '-') != -1) {
-      $word =~ s/-([a-z])/-\U$1/g;
-    }
+    $word = join('-', @parts);
   }
   return join(' ', @words);
 }
@@ -628,6 +707,8 @@
 sub fix_title {
   my ($value) = @_;
   $value = fix_capitalization($value);
+  $value =~ s/([^ ])---/$1 ---/g;
+  $value =~ s/---([^ ])/--- $1/g;
   return $value;
 }
 
@@ -653,7 +734,9 @@
   if ($left !~ /^[0-9]*$/ or $right !~ /^[0-9]*$/) {
     return $value;
   }
-  if ($left + 0 gt $right + 0) {
+  $left = $left + 0;
+  $right = $right + 0;
+  if ($left > $right) {
     my $tmp = $left;
     $left = $right;
     $right = $tmp;
@@ -675,6 +758,22 @@
   foreach my $org (@orgs) {
     $value =~ s/ \Q$org\E / /g;
   }
+  my %numbers = (
+    'First' => '1st',
+    'Second' => '2nd',
+    'Third' => '3rd',
+    'Fourth' => '4th',
+    'Fifth' => '5th',
+    'Sixth' => '6th',
+    'Seventh' => '7th',
+    'Eighth' => '8th',
+    'Nineth' => '9th',
+    'Tenth' => '10th'
+  );
+  keys %numbers;
+  while(my($left, $right) = each %numbers) {
+    $value =~ s/^Proceedings of the \Q$left\E /Proceedings of the $right /g;
+  }
   return $value;
 }
 
@@ -707,6 +806,28 @@
   return $value;
 }
 
+sub fix_unicode {
+  my ($value) = @_;
+  my %literals = (
+    'ò' => '\`{o}', 'ó' => '\\\'{o}', 'ô' => '\^{o}', 'ö' => '\"{o}', 'ő' => '\H{o}', 'ǒ' => '\v{o}', 'õ' => '\~{o}',
+    'à' => '\`{a}', 'á' => '\\\'{a}', 'â' => '\^{a}', 'ä' => '\"{a}', 'å' => '\r{a}', 'ą' => '\k{a}', 'ǎ' => '\v{a}', 'ã' => '\~{a}',
+    'ù' => '\`{u}', 'ú' => '\\\'{u}', 'û' => '\^{u}', 'ü' => '\"{u}', 'ů' => '\r{u}', 'ǔ' => '\v{u}', 'ũ' => '\~{u}',
+    'ì' => '\`{i}', 'í' => '\\\'{i}', 'î' => '\^{i}', 'ï' => '\"{i}', 'ǐ' => '\v{i}', 'ĩ' => '\~{i}',
+    'ń' => '\\\'{n}', 'ň' => '\v{n}', 'ñ' => '\~{n}',
+    'ç' => '\c{c}',
+    'ł' => '\l{}',
+    'ı' => '{\i}',
+    'ø' => '\o{}',
+    '–' => '--', '—' => '---',
+    '’' => '\''
+  );
+  keys %literals;
+  while(my($k, $v) = each %literals) {
+    $value =~ s/\Q$k\E/$v/g;
+  }
+  return $value;
+}
+
 # Parse the incoming .bib file and return an array
 # of hash-maps, where each one is a bibentry.
 sub entries {
@@ -832,7 +953,12 @@
 # Takes the text and returns only list of words seen there.
 sub only_words {
   my ($tex) = @_;
-  return split(/[ \-]/, clean_tex($tex));
+  my $t = clean_tex($tex);
+  $t =~ s/([^a-zA-Z0-9\\'])/ $1 /g;
+  $t =~ s/- +- +-/---/g;
+  $t =~ s/{ /{/g;
+  $t =~ s/ }/}/g;
+  return split(/ +/, $t);
 }
 
 # Take a TeX string and return a cleaner one, without redundant spaces, brackets, etc.
@@ -958,7 +1084,7 @@
     "      --latex     Report errors in LaTeX format using \\PackageWarningNoLine command\n\n" .
     "If any issues, report to GitHub: https://github.com/yegor256/bibcop");
 } elsif (exists $args{'--version'} or exists $args{'-v'}) {
-  info('0.0.20 2024-03-15');
+  info('0.0.21 2024-05-03');
 } else {
   my ($file) = grep { not($_ =~ /^-.*$/) } @ARGV;
   if (not $file) {
@@ -997,7 +1123,7 @@
         $found += 1;
       }
     }
-    if ($found gt 0) {
+    if ($found > 0) {
       debug("$found problem(s) found");
       fail();
     }

Modified: trunk/Master/texmf-dist/source/bibtex/bibcop/bibcop.dtx
===================================================================
--- trunk/Master/texmf-dist/source/bibtex/bibcop/bibcop.dtx	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Master/texmf-dist/source/bibtex/bibcop/bibcop.dtx	2024-05-06 16:47:16 UTC (rev 71186)
@@ -50,7 +50,7 @@
 %<package>\NeedsTeXFormat{LaTeX2e}
 %<package>\ProvidesPackage{bibcop}
 %<*package>
-[2024-03-15 0.0.20 Style Checker of Bibliography Files]
+[2024-05-03 0.0.21 Style Checker of Bibliography Files]
 %</package>
 %<*driver>
 \documentclass{ltxdoc}

Modified: trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty	2024-05-05 23:41:43 UTC (rev 71185)
+++ trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty	2024-05-06 16:47:16 UTC (rev 71186)
@@ -31,7 +31,7 @@
 
 \NeedsTeXFormat{LaTeX2e}
 \ProvidesPackage{bibcop}
-[2024-03-15 0.0.20 Style Checker of Bibliography Files]
+[2024-05-03 0.0.21 Style Checker of Bibliography Files]
 
 
 



More information about the tex-live-commits mailing list.