texlive[65279] trunk: bibcop (15dec22)
commits+karl at tug.org
commits+karl at tug.org
Thu Dec 15 21:43:35 CET 2022
Revision: 65279
http://tug.org/svn/texlive?view=revision&revision=65279
Author: karl
Date: 2022-12-15 21:43:35 +0100 (Thu, 15 Dec 2022)
Log Message:
-----------
bibcop (15dec22)
Modified Paths:
--------------
trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl
trunk/Master/texmf-dist/doc/latex/bibcop/DEPENDS.txt
trunk/Master/texmf-dist/doc/latex/bibcop/README.md
trunk/Master/texmf-dist/doc/latex/bibcop/bibcop.pdf
trunk/Master/texmf-dist/doc/man/man1/bibcop.1
trunk/Master/texmf-dist/doc/man/man1/bibcop.man1.pdf
trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl
trunk/Master/texmf-dist/source/latex/bibcop/bibcop.dtx
trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty
Modified: trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Build/source/texk/texlive/linked_scripts/bibcop/bibcop.pl 2022-12-15 20:43:35 UTC (rev 65279)
@@ -32,36 +32,36 @@
# If you want to add an extra check, just create a new procedure
# named as "check_*".
-# Only these keys are allowed and only these types of items.
+# Only these tags are allowed and only these types of entries.
my %blessed = (
- 'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'publisher?'],
+ 'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'publisher?', 'pages?'],
'inproceedings' => ['doi', 'booktitle', 'title', 'author', 'year', 'pages?', 'organization?', 'volume?'],
'book' => ['doi', 'title', 'author', 'year', 'publisher'],
'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'publisher?', 'organization?', 'doi?'],
);
-# Check the presence of mandatory keys.
-sub check_mandatory_keys {
- my (%item) = @_;
- my $type = $item{':type'};
+# Check the presence of mandatory tags.
+sub check_mandatory_tags {
+ my (%entry) = @_;
+ my $type = $entry{':type'};
my $mandatory = $blessed{$type};
- foreach my $key (@$mandatory) {
- if ($key =~ /^.*\?$/) {
+ foreach my $tag (@$mandatory) {
+ if ($tag =~ /^.*\?$/) {
next;
}
- if (not(exists $item{$key})) {
- my $listed = listed_keys(%item);
- return "A mandatory '$key' key for '\@$type' is missing among $listed"
+ if (not(exists $entry{$tag})) {
+ my $listed = listed_tags(%entry);
+ return "A mandatory '$tag' tag for '\@$type' is missing among $listed"
}
}
if (exists $blessed{$type}) {
my %required = map { $_ => 1 } @$mandatory;
- foreach my $key (keys %item) {
- if ($key =~ /^:/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:/) {
next;
}
- if (not(exists $required{$key}) && not(exists $required{$key . '?'})) {
- return "The '$key' key is not suitable for '$type', use only these: (@$mandatory)"
+ if (not(exists $required{$tag}) && not(exists $required{$tag . '?'})) {
+ return "The '$tag' tag is not suitable for '$type', use only these: (@$mandatory)"
}
}
}
@@ -69,14 +69,14 @@
# Check that all major words are capitalized.
sub check_capitalization {
- my (%item) = @_;
- my %keys = map { $_ => 1 } qw/title booktitle journal/;
+ my (%entry) = @_;
+ my %tags = map { $_ => 1 } qw/title booktitle journal publisher organization/;
my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into/;
- foreach my $key (keys %item) {
- if (not exists $keys{$key}) {
+ foreach my $tag (keys %entry) {
+ if (not exists $tags{$tag}) {
next;
}
- my $value = $item{$key};
+ my $value = $entry{$tag};
my @words = only_words($value);
my $pos = 0;
foreach my $word (@words) {
@@ -88,10 +88,10 @@
next;
}
if (exists $minors{lc($word)} and $pos gt 1) {
- return "All minor words in the '$key' must be lower-cased, while '$word' (no.$pos) is not"
+ return "All minor words in the '$tag' must be lower-cased, while '$word' (no.$pos) is not"
}
if ($word =~ /^[a-z].*/) {
- return "All major words in the '$key' must be capitalized, while '$word' (no.$pos) is not"
+ return "All major words in the '$tag' must be capitalized, while '$word' (no.$pos) is not"
}
}
}
@@ -99,9 +99,9 @@
# Check that the 'author' is formatted correctly.
sub check_author {
- my (%item) = @_;
- if (exists $item{'author'} and not $item{'author'} =~ /^\{.+\}$/) {
- my $author = clean_tex($item{'author'});
+ my (%entry) = @_;
+ if (exists $entry{'author'} and not $entry{'author'} =~ /^\{.+\}$/) {
+ my $author = clean_tex($entry{'author'});
if (not $author =~ /^[A-Z][^ ]+(,( [A-Z][^ ]+)+)?( and [A-Z][^ ]+(,( [A-Z][^ ]+)+)?)*( and others)?$/) {
return "The format of the 'author' is wrong, use something like 'Knuth, Donald E. and Duane, Bibby'"
}
@@ -113,13 +113,13 @@
# Check that titles don't have shortened words with a tailing dot.
sub check_shortenings {
- my (%item) = @_;
- my %keys = map { $_ => 1 } qw/title booktitle journal/;
- foreach my $key (keys %item) {
- if (not exists $keys{$key}) {
+ my (%entry) = @_;
+ my %tags = map { $_ => 1 } qw/title booktitle journal/;
+ foreach my $tag (keys %entry) {
+ if (not exists $tags{$tag}) {
next;
}
- my $value = $item{$key};
+ my $value = $entry{$tag};
my @words = only_words($value);
foreach my $word (@words) {
if (not $word =~ /^[A-Za-z]/) {
@@ -126,7 +126,7 @@
next;
}
if ($word =~ /^.*\.$/) {
- return "Do not shorten the words in the '$key', such as '$word'"
+ return "Do not shorten the words in the '$tag', such as '$word'"
}
}
}
@@ -134,63 +134,139 @@
# Check the right format of the 'title' and 'booktitle.'
sub check_titles {
- my (%item) = @_;
- my @keys = qw/title booktitle/;
- foreach my $key (@keys) {
- if (not exists($item{$key})) {
+ my (%entry) = @_;
+ my @tags = qw/title booktitle/;
+ foreach my $tag (@tags) {
+ if (not exists($entry{$tag})) {
next;
}
- my $title = $item{$key};
+ my $title = $entry{$tag};
if (not $title =~ /^\{.+\}$/) {
- return "The '$key' must be wrapped in double curled brackets"
+ return "The '$tag' must be wrapped in double curled brackets"
}
}
}
+# Check the right format of the tags for arXiv.
+# See https://arxiv.org/help/arxiv_identifier
+sub check_arXiv {
+ my (%entry) = @_;
+ if (exists($entry{'archiveprefix'})) {
+ if (not exists $entry{'eprint'}) {
+ return "The 'eprint' is mandatory when 'archiveprefix' is there"
+ }
+ if (not $entry{'eprint'} =~ /^[0-9]{4}\.[0-9]{4,5}(v[0-9]+)?$/) {
+ return "The 'eprint' must have two integers separated by a dot"
+ }
+ my $eprint = $entry{'eprint'};
+ my ($head, $tail) = split(/\./, $eprint);
+ my $year = substr($head, 0, 2);
+ my $month = substr($head, 2);
+ if ($month > 12) {
+ return "The month '$month' of the 'eprint' is wrong, it can't be bigger than 12"
+ }
+ if (not exists $entry{'primaryclass'}) {
+ return "The 'primaryclass' is mandatory when 'archiveprefix' is there"
+ }
+ if (not $entry{'primaryclass'} =~ /^[a-z]{2,}\.[A-Z]{2}$/) {
+ return "The 'primaryclass' must have two parts, like 'cs.PL'"
+ }
+ }
+}
+
+# Check that organization is not mentioned in the booktitle.
+sub check_org_in_booktitle {
+ my (%entry) = @_;
+ my @orgs = ( 'ACM', 'IEEE' );
+ if (exists($entry{'booktitle'})) {
+ my $title = $entry{'booktitle'};
+ foreach my $o (@orgs) {
+ if ($title =~ /^.*\Q$o\E.*$/) {
+ return "The '$o' organization must not be mentioned in the booktitle, use 'publisher' tag instead"
+ }
+ }
+ if ($title =~ /^.*(ACM|IEEE).*$/) {
+ return "Don't mention the"
+ }
+ }
+}
+
# Check that no values have tailing dots.
# Check that there are no spaces before commans.
sub check_typography {
- my (%item) = @_;
- foreach my $key (keys %item) {
- if ($key =~ /^:.*/) {
+ my (%entry) = @_;
+ my %symbols = (
+ '.' => 'dot',
+ ',' => 'comma',
+ ';' => 'semi-colon',
+ ':' => 'colon',
+ '!' => 'exclamation mark',
+ '?' => 'question mark',
+ '-' => 'dash',
+ '--' => 'double dash',
+ '---' => 'triple dash',
+ );
+ my @spaces_around = ( '---' );
+ my @no_spaces_around = ( '--', '-' );
+ my @no_space_before = ( '.', ',', ';', ':', '?', '!' );
+ my @bad_tails = ( '.', ',', ';', ':', '-' );
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:.*/) {
next;
}
- my $value = $item{$key};
- if ($value =~ /.*\.$/ and $key ne 'author') {
- return "The '$key' must not end with a dot"
+ my $value = $entry{$tag};
+ foreach my $s (@bad_tails) {
+ if ($s eq '.' and $tag eq 'author') {
+ next;
+ }
+ if ($value =~ /^.*\Q$s\E$/) {
+ return "The '$tag' must not end with a $symbols{$s}"
+ }
}
- if ($value =~ /.* ,.*/) {
- return "In the '$key', do not put a space before the comma"
+ foreach my $s (@no_space_before) {
+ if ($value =~ /^.*\s\Q$s\E.*$/) {
+ return "In the '$tag', do not put a space before a $symbols{$s}"
+ }
}
+ foreach my $s (@spaces_around) {
+ if ($value =~ /^.*[^\s]\Q$s\E.*$/ or $value =~ /^.*\Q$s\E[^\s].*$/) {
+ return "In the '$tag', put spaces around a $symbols{$s}"
+ }
+ }
+ foreach my $s (@no_spaces_around) {
+ if ($value =~ /^.*\s\Q$s\E\s.*$/) {
+ return "In the '$tag', don't put spaces around a $symbols{$s}"
+ }
+ }
}
}
# Check the year is not mentioned in titles.
sub check_year_in_titles {
- my (%item) = @_;
- my @keys = qw/title booktitle journal/;
- foreach my $key (@keys) {
- if (not exists($item{$key})) {
+ my (%entry) = @_;
+ my @tags = qw/title booktitle journal/;
+ foreach my $tag (@tags) {
+ if (not exists($entry{$tag})) {
next;
}
- my @words = only_words($item{$key});
+ my @words = only_words($entry{$tag});
foreach my $word (@words) {
if ($word =~ /^[1-9][0-9]{3}$/) {
- return "The '$key' must not contain the year $word, it is enough to have the 'year' key"
+ return "The '$tag' must not contain the year $word, it is enough to have the 'year' tag"
}
}
}
}
-# Check the right format of the 'booktitle' in the 'inproceedings' item.
+# Check the right format of the 'booktitle' in the 'inproceedings' entry.
sub check_booktile_of_inproceedings {
- my (%item) = @_;
- my $key = 'inproceedings';
- if ($item{':type'} eq $key) {
- if (exists $item{'booktitle'}) {
- my @words = only_words($item{'booktitle'});
+ my (%entry) = @_;
+ my $tag = 'inproceedings';
+ if ($entry{':type'} eq $tag) {
+ if (exists $entry{'booktitle'}) {
+ my @words = only_words($entry{'booktitle'});
if (lc($words[0]) ne 'proceedings' or lc($words[1]) ne 'of' or lc($words[2]) ne 'the') {
- return "The '$key' must start with 'Proceedings of the ...'"
+ return "The '$tag' must start with 'Proceedings of the ...'"
}
}
}
@@ -198,10 +274,10 @@
# Check the right format of the 'doi.'
sub check_doi {
- my (%item) = @_;
- if (exists $item{'doi'}) {
- my $doi = $item{'doi'};
- if (not $item{'doi'} =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-]+$/) {
+ my (%entry) = @_;
+ if (exists $entry{'doi'}) {
+ my $doi = $entry{'doi'};
+ if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(]+$/) {
return "The format of the 'doi' is wrong"
}
}
@@ -209,10 +285,10 @@
# Check the right format of the 'year.'
sub check_year {
- my (%item) = @_;
- if (exists $item{'year'}) {
- my $year = $item{'year'};
- if (not $item{'year'} =~ /^[0-9]{3,4}$/) {
+ my (%entry) = @_;
+ if (exists $entry{'year'}) {
+ my $year = $entry{'year'};
+ if (not $year =~ /^[0-9]{3,4}$/) {
return "The format of the 'year' is wrong"
}
}
@@ -220,10 +296,10 @@
# Check the right format of the 'month.'
sub check_month {
- my (%item) = @_;
- if (exists $item{'month'}) {
- my $month = $item{'month'};
- if (not $item{'month'} =~ /^[1-9]|10|11|12$/) {
+ my (%entry) = @_;
+ if (exists $entry{'month'}) {
+ my $month = $entry{'month'};
+ if (not $month =~ /^[1-9]|10|11|12$/) {
return "The format of the 'month' is wrong"
}
}
@@ -231,10 +307,10 @@
# Check the right format of the 'volume.'
sub check_volume {
- my (%item) = @_;
- if (exists $item{'volume'}) {
- my $volume = $item{'volume'};
- if (not $item{'volume'} =~ /^[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'volume'}) {
+ my $volume = $entry{'volume'};
+ if (not $volume =~ /^[1-9][0-9]*$/) {
return "The format of the 'volume' is wrong"
}
}
@@ -242,10 +318,10 @@
# Check the right format of the 'number.'
sub check_number {
- my (%item) = @_;
- if (exists $item{'number'}) {
- my $number = $item{'number'};
- if (not $item{'number'} =~ /^[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'number'}) {
+ my $number = $entry{'number'};
+ if (not $number =~ /^[1-9][0-9]*$/) {
return "The format of the 'number' is wrong"
}
}
@@ -253,14 +329,17 @@
# Check the right format of the 'pages.'
sub check_pages {
- my (%item) = @_;
- if (exists $item{'pages'}) {
- my $pages = $item{'pages'};
- if (not $item{'pages'} =~ /^[1-9][0-9]*--[1-9][0-9]*|[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'pages'}) {
+ my $pages = $entry{'pages'};
+ if (not $pages =~ /^([1-9][0-9]*--[1-9][0-9]*|[1-9][0-9]*)$/) {
return "The format of the 'pages' is wrong"
}
my @parts = split(/--/, $pages);
if (@parts+0 eq 2) {
+ if ($parts[0] eq $parts[1]) {
+ return "The 'pages' mentions the same page twice, just use it once"
+ }
if ($parts[0] gt $parts[1]) {
return "The 'pages' are in the wrong order"
}
@@ -268,9 +347,9 @@
}
}
-# Check one item.
-sub process_item {
- my (%item) = @_;
+# Check one entry.
+sub process_entry {
+ my (%entry) = @_;
my @checks;
foreach my $entry (keys %bibcop::) {
if ($entry =~ /^check_/) {
@@ -281,7 +360,7 @@
my @errors;
foreach my $check (@sorted) {
no strict 'refs';
- my $err = $check->(%item);
+ my $err = $check->(%entry);
if ($err ne '') {
push(@errors, $err);
}
@@ -290,14 +369,14 @@
}
# Parse the incoming .bib file and return an array
-# of hash-maps, where each one is a bibitem.
-sub bibitems {
+# of hash-maps, where each one is a bibentry.
+sub entries {
my ($bib) = @_;
- my @items;
+ my @entries;
my $s = 'top';
- my %item;
+ my %entry;
my $acc = '';
- my $key = '';
+ my $tag = '';
my $lineno = 0;
my $nest = 0;
my $escape = 0;
@@ -309,56 +388,56 @@
# ignore the EOL
$lineno = $lineno + 1;
} elsif ($char eq '@' and $s eq 'top') {
- %item = ();
+ %entry = ();
$s = 'start';
$acc = '';
} elsif ($char =~ /[a-z]/ and $s eq 'start') {
# @article
} elsif ($char eq '{' and $s eq 'start') {
- $item{':type'} = substr($acc, 1);
+ $entry{':type'} = substr($acc, 1);
$acc = '';
$s = 'body';
} elsif ($char =~ /[a-zA-Z]/ and $s eq 'body') {
$acc = '';
- $s = 'key';
- } elsif ($char =~ /[a-zA-Z0-9_]/ and $s eq 'key') {
- # reading the key
+ $s = 'tag';
+ } elsif ($char =~ /[a-zA-Z0-9_]/ and $s eq 'tag') {
+ # reading the tag
} elsif ($char =~ /[a-zA-Z0-9]/ and $s eq 'value') {
# reading the value without quotes or brackets
- } elsif ($char eq ',' and $s eq 'key') {
- $item{':name'} = $acc;
+ } elsif ($char eq ',' and $s eq 'tag') {
+ $entry{':name'} = $acc;
$s = 'body';
- } elsif ($char eq '=' and $s eq 'key') {
- $key = $acc;
+ } elsif ($char eq '=' and $s eq 'tag') {
+ $tag = $acc;
$s = 'value';
$acc = '';
} elsif ($char eq ',' and $s eq 'value') {
- if (not exists $item{lc($key)}) {
+ if (not exists $entry{lc($tag)}) {
my $tex = substr($acc, 1);
$tex =~ s/\s//g;
- $item{lc($key)} = $tex;
+ $entry{lc($tag)} = $tex;
}
$s = 'body';
} elsif ($char eq '}' and $s eq 'body') {
- push(@items, { %item });
+ push(@entries, { %entry });
$s = 'top';
} elsif ($char eq '}' and $s eq 'value') {
- if (not exists $item{lc($key)}) {
+ if (not exists $entry{lc($tag)}) {
my $tex = substr($acc, 1);
$tex =~ s/\s//g;
- $item{lc($key)} = $tex;
+ $entry{lc($tag)} = $tex;
}
- push(@items, { %item });
+ push(@entries, { %entry });
$s = 'top';
- } elsif ($char eq '}' and $s eq 'key') {
- $item{':name'} = $acc;
- push(@items, { %item });
+ } elsif ($char eq '}' and $s eq 'tag') {
+ $entry{':name'} = $acc;
+ push(@entries, { %entry });
$s = 'top';
} elsif ($char eq '"' and $s eq 'value') {
$s = 'quote';
$acc = '';
} elsif ($char eq '"' and $s eq 'quote') {
- $item{lc($key)} = substr($acc, 1);
+ $entry{lc($tag)} = substr($acc, 1);
$s = 'value';
} elsif ($s eq 'quote') {
# nothing
@@ -374,13 +453,13 @@
} elsif ($char eq '}' and $escape ne 1) {
$nest = $nest - 1;
if ($nest eq 0) {
- $item{lc($key)} = substr($acc, 1);
+ $entry{lc($tag)} = substr($acc, 1);
$s = 'value';
}
}
$escape = 0;
} else {
- print "\\PackageWarningNoLine{bibcop}{It is impossible to parse the .bib file, because I do not know what to do with '$char' at line #$lineno (s=$s)}\n";
+ warning("It is impossible to parse the .bib file, because I do not know what to do with '$char' at line #$lineno (s=$s)");
last;
}
if ($char eq ' ' and not($s =~ /quote|brackets/)) {
@@ -388,7 +467,7 @@
}
$acc = $acc . $char;
}
- return @items;
+ return @entries;
}
# Takes the text and returns only list of words seen there.
@@ -407,15 +486,15 @@
return $tex;
}
-# Take a bibitem and print all its keys as a comma-separated string.
-sub listed_keys {
- my (%item) = @_;
+# Take a bibentry and print all its tags as a comma-separated string.
+sub listed_tags {
+ my (%entry) = @_;
my @list;
- foreach my $key (keys %item) {
- if ($key =~ /^:.*/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:.*/) {
next;
}
- push(@list, $key);
+ push(@list, $tag);
}
my @sorted = sort @list;
return '(' . join(', ', @sorted) . ')';
@@ -424,7 +503,11 @@
# Print ERROR message to the console and die.
sub error {
my ($txt) = @_;
- print $txt . "\n";
+ if (exists $args{'--latex'}) {
+ print "\\PackageError{bibcop}{$txt}\n";
+ } else {
+ print $txt . "\n";
+ }
exit 1;
}
@@ -431,6 +514,18 @@
# Print DEBUG message to the console.
sub debug {
my ($txt) = @_;
+ if (exists $args{'--verbose'}) {
+ if (exists $args{'--latex'}) {
+ print "\\message{bibcop: $txt^^J}\n";
+ } else {
+ print $txt . "\n";
+ }
+ }
+}
+
+# Print INFO message to the console.
+sub info {
+ my ($txt) = @_;
if (exists $args{'--latex'}) {
print '% ';
}
@@ -447,57 +542,64 @@
}
}
-if (@ARGV+0 eq 0 or exists $args{'--help'}) {
- debug("Bibcop is a Style Checker of .bib Files\n" .
- "Usage: bibcop [<options>] <.bib file path>\n" .
- " --version Print the current version of the tool and exit\n" .
- " --help Print this help screen\n" .
- " --fix Fix the errors and print a new version of the .bib file to the console\n" .
- " --latex Report errors in LaTeX format using \\PackageWarningNoLine command");
-} elsif (exists $args{'--version'}) {
- debug('0.0.3');
+if (@ARGV+0 eq 0 or exists $args{'--help'} or exists $args{'-?'}) {
+ info("Bibcop is a Style Checker of BibTeX Files\n\n" .
+ "Usage:\n" .
+ " bibcop [<options>] <.bib file path>\n\n" .
+ "Options:\n" .
+ " -v, --version Print the current version of the tool and exit\n" .
+ " -?, --help Print this help screen\n" .
+ " --fix Fix the errors and print a new version of the .bib file to the console\n" .
+ " --verbose Print supplementary debugging information\n" .
+ " --latex Report errors in LaTeX format using \\PackageWarningNoLine command\n\n" .
+ "If any issues, report to GitHub: https://github.com/yegor256/bibcop");
+} elsif (exists $args{'--version'} or exists $args{'-v'}) {
+ info('0.0.4');
} else {
my ($file) = grep { not($_ =~ /^--.*$/) } @ARGV;
+ if (not $file) {
+ error('File name must be specified');
+ }
open(my $fh, '<', $file);
my $bib; { local $/; $bib = <$fh>; }
- my @items = bibitems($bib);
+ my @entries = entries($bib);
if (exists $args{'--fix'}) {
- for my $i (0..(@items+0 - 1)) {
- my %item = %{ $items[$i] };
- my $type = $item{':type'};
+ for my $i (0..(@entries+0 - 1)) {
+ my %entry = %{ $entries[$i] };
+ my $type = $entry{':type'};
if (not exists $blessed{$type}) {
- error("I don't know what to do with \@$type type of bibitem");
+ error("I don't know what to do with \@$type type of bibentry");
}
- my $keys = $blessed{$item{':type'}};
- my %allowed = map { $_ => 1 } @$keys;
+ my $tags = $blessed{$entry{':type'}};
+ my %allowed = map { $_ => 1 } @$tags;
my @lines;
- foreach my $key (keys %item) {
- if ($key =~ /^:/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:/) {
next;
}
- if (not exists $allowed{$key} and not exists $allowed{$key . '?'}) {
+ if (not exists $allowed{$tag} and not exists $allowed{$tag . '?'}) {
next;
}
- my $value = clean_tex($item{$key});
- if ($key =~ /title|booktitle|journal/) {
+ my $value = clean_tex($entry{$tag});
+ if ($tag =~ /title|booktitle|journal/) {
$value = '{' . $value . '}';
}
- push(@lines, " $key = {$value},");
+ push(@lines, " $tag = {$value},");
}
- debug("\@$type\{$item{':name'},");
+ info("\@$type\{$entry{':name'},");
my @sorted = sort @lines;
foreach my $line (@sorted) {
- debug($line);
+ info($line);
}
- debug("}\n");
+ info("}\n");
}
} else {
- debug((@items+0) . ' bibitems found in ' . $file);
- for my $i (0..(@items+0 - 1)) {
- my %item = %{ $items[$i] };
- debug("Checking $item{':name'} (#$i)...");
- foreach my $err (process_item(%item)) {
- warning("$err, in the '$item{':name'}' bibitem");
+ debug((@entries+0) . ' entries found in ' . $file);
+ for my $i (0..(@entries+0 - 1)) {
+ my %entry = %{ $entries[$i] };
+ debug("Checking $entry{':name'} (no.$i)...");
+ foreach my $err (process_entry(%entry)) {
+ warning("$err, in the '$entry{':name'}' bib entry");
}
}
}
Modified: trunk/Master/texmf-dist/doc/latex/bibcop/DEPENDS.txt
===================================================================
--- trunk/Master/texmf-dist/doc/latex/bibcop/DEPENDS.txt 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/doc/latex/bibcop/DEPENDS.txt 2022-12-15 20:43:35 UTC (rev 65279)
@@ -1,2 +1,3 @@
hard iexec
-hard verbatimcopy
\ No newline at end of file
+hard verbatimcopy
+hard pgfopts
Modified: trunk/Master/texmf-dist/doc/latex/bibcop/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/bibcop/README.md 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/doc/latex/bibcop/README.md 2022-12-15 20:43:35 UTC (rev 65279)
@@ -36,6 +36,14 @@
This command will read the `main.bib` file and create `fixed.bib`, which
will have the fixed and properly formatted content (well, to some extent).
+If you install the package using [`tlmgr`](https://www.tug.org/texlive/tlmgr.html),
+you should be able to use `bibcop` directly, without the necessity to mention Perl:
+
+```
+$ tlgmr install bibcop
+$ bibcop --help
+```
+
## How to Contribute
If you want to contribute yourself, make a fork, then create a branch,
Modified: trunk/Master/texmf-dist/doc/latex/bibcop/bibcop.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/man/man1/bibcop.1
===================================================================
--- trunk/Master/texmf-dist/doc/man/man1/bibcop.1 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/doc/man/man1/bibcop.1 2022-12-15 20:43:35 UTC (rev 65279)
@@ -1,16 +1,16 @@
-.TH bibcop 1 "2022-12-13"
+.TH bibcop 1 "2022-12-15"
.SH NAME
-bibcop \- Style Checker and Fixer of .bib Files
+bibcop \- Style Checker and Fixer of BibTeX Files (.bib)
.SH SYNOPSIS
bibcop [<options>] <.bib file path>
.SH DESCRIPTION
-The bibcop tool is a Perl script for checking the quality of .bib
-files and automatically fixing found inconsistencies. It is not only
+The bibcop tool is a Perl script for checking the quality of BibTeX bibliograph
+files (.bib) and automatically fixing found inconsistencies. It is not only
a command line tool, but a LaTeX package, which can check the quality
of a .bib file during the rendering of a LaTeX document. All found
issues are reported as LaTeX warnings.
.SH OPTIONS
-Various options apply
+Various options apply:
.IP --version
Print the version of the tool
.IP --help
@@ -19,10 +19,11 @@
Autho-fix all issues and print a new content to the console
.IP --latex
Print everything in LaTeX format
-.SH BUGS
+.IP --verbose
+Print debugging information too
.SH AUTHOR
Yegor Bugayenko (yegor256 at gmail.com)
-.PP
+.SH BUGS
Please log issues on the GitHub homepage:
https://github.com/yegor256/bibcop/issues.
.SH SEE ALSO
Modified: trunk/Master/texmf-dist/doc/man/man1/bibcop.man1.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/scripts/bibcop/bibcop.pl 2022-12-15 20:43:35 UTC (rev 65279)
@@ -32,36 +32,36 @@
# If you want to add an extra check, just create a new procedure
# named as "check_*".
-# Only these keys are allowed and only these types of items.
+# Only these tags are allowed and only these types of entries.
my %blessed = (
- 'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'publisher?'],
+ 'article' => ['doi', 'year', 'title', 'author', 'journal', 'volume', 'number', 'publisher?', 'pages?'],
'inproceedings' => ['doi', 'booktitle', 'title', 'author', 'year', 'pages?', 'organization?', 'volume?'],
'book' => ['doi', 'title', 'author', 'year', 'publisher'],
'misc' => ['title', 'author', 'year', 'eprint?', 'archiveprefix?', 'primaryclass?', 'publisher?', 'organization?', 'doi?'],
);
-# Check the presence of mandatory keys.
-sub check_mandatory_keys {
- my (%item) = @_;
- my $type = $item{':type'};
+# Check the presence of mandatory tags.
+sub check_mandatory_tags {
+ my (%entry) = @_;
+ my $type = $entry{':type'};
my $mandatory = $blessed{$type};
- foreach my $key (@$mandatory) {
- if ($key =~ /^.*\?$/) {
+ foreach my $tag (@$mandatory) {
+ if ($tag =~ /^.*\?$/) {
next;
}
- if (not(exists $item{$key})) {
- my $listed = listed_keys(%item);
- return "A mandatory '$key' key for '\@$type' is missing among $listed"
+ if (not(exists $entry{$tag})) {
+ my $listed = listed_tags(%entry);
+ return "A mandatory '$tag' tag for '\@$type' is missing among $listed"
}
}
if (exists $blessed{$type}) {
my %required = map { $_ => 1 } @$mandatory;
- foreach my $key (keys %item) {
- if ($key =~ /^:/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:/) {
next;
}
- if (not(exists $required{$key}) && not(exists $required{$key . '?'})) {
- return "The '$key' key is not suitable for '$type', use only these: (@$mandatory)"
+ if (not(exists $required{$tag}) && not(exists $required{$tag . '?'})) {
+ return "The '$tag' tag is not suitable for '$type', use only these: (@$mandatory)"
}
}
}
@@ -69,14 +69,14 @@
# Check that all major words are capitalized.
sub check_capitalization {
- my (%item) = @_;
- my %keys = map { $_ => 1 } qw/title booktitle journal/;
+ my (%entry) = @_;
+ my %tags = map { $_ => 1 } qw/title booktitle journal publisher organization/;
my %minors = map { $_ => 1 } qw/in of at to by the a an and or as if up via yet nor but off on for into/;
- foreach my $key (keys %item) {
- if (not exists $keys{$key}) {
+ foreach my $tag (keys %entry) {
+ if (not exists $tags{$tag}) {
next;
}
- my $value = $item{$key};
+ my $value = $entry{$tag};
my @words = only_words($value);
my $pos = 0;
foreach my $word (@words) {
@@ -88,10 +88,10 @@
next;
}
if (exists $minors{lc($word)} and $pos gt 1) {
- return "All minor words in the '$key' must be lower-cased, while '$word' (no.$pos) is not"
+ return "All minor words in the '$tag' must be lower-cased, while '$word' (no.$pos) is not"
}
if ($word =~ /^[a-z].*/) {
- return "All major words in the '$key' must be capitalized, while '$word' (no.$pos) is not"
+ return "All major words in the '$tag' must be capitalized, while '$word' (no.$pos) is not"
}
}
}
@@ -99,9 +99,9 @@
# Check that the 'author' is formatted correctly.
sub check_author {
- my (%item) = @_;
- if (exists $item{'author'} and not $item{'author'} =~ /^\{.+\}$/) {
- my $author = clean_tex($item{'author'});
+ my (%entry) = @_;
+ if (exists $entry{'author'} and not $entry{'author'} =~ /^\{.+\}$/) {
+ my $author = clean_tex($entry{'author'});
if (not $author =~ /^[A-Z][^ ]+(,( [A-Z][^ ]+)+)?( and [A-Z][^ ]+(,( [A-Z][^ ]+)+)?)*( and others)?$/) {
return "The format of the 'author' is wrong, use something like 'Knuth, Donald E. and Duane, Bibby'"
}
@@ -113,13 +113,13 @@
# Check that titles don't have shortened words with a tailing dot.
sub check_shortenings {
- my (%item) = @_;
- my %keys = map { $_ => 1 } qw/title booktitle journal/;
- foreach my $key (keys %item) {
- if (not exists $keys{$key}) {
+ my (%entry) = @_;
+ my %tags = map { $_ => 1 } qw/title booktitle journal/;
+ foreach my $tag (keys %entry) {
+ if (not exists $tags{$tag}) {
next;
}
- my $value = $item{$key};
+ my $value = $entry{$tag};
my @words = only_words($value);
foreach my $word (@words) {
if (not $word =~ /^[A-Za-z]/) {
@@ -126,7 +126,7 @@
next;
}
if ($word =~ /^.*\.$/) {
- return "Do not shorten the words in the '$key', such as '$word'"
+ return "Do not shorten the words in the '$tag', such as '$word'"
}
}
}
@@ -134,63 +134,139 @@
# Check the right format of the 'title' and 'booktitle.'
sub check_titles {
- my (%item) = @_;
- my @keys = qw/title booktitle/;
- foreach my $key (@keys) {
- if (not exists($item{$key})) {
+ my (%entry) = @_;
+ my @tags = qw/title booktitle/;
+ foreach my $tag (@tags) {
+ if (not exists($entry{$tag})) {
next;
}
- my $title = $item{$key};
+ my $title = $entry{$tag};
if (not $title =~ /^\{.+\}$/) {
- return "The '$key' must be wrapped in double curled brackets"
+ return "The '$tag' must be wrapped in double curled brackets"
}
}
}
+# Check the right format of the tags for arXiv.
+# See https://arxiv.org/help/arxiv_identifier
+sub check_arXiv {
+ my (%entry) = @_;
+ if (exists($entry{'archiveprefix'})) {
+ if (not exists $entry{'eprint'}) {
+ return "The 'eprint' is mandatory when 'archiveprefix' is there"
+ }
+ if (not $entry{'eprint'} =~ /^[0-9]{4}\.[0-9]{4,5}(v[0-9]+)?$/) {
+ return "The 'eprint' must have two integers separated by a dot"
+ }
+ my $eprint = $entry{'eprint'};
+ my ($head, $tail) = split(/\./, $eprint);
+ my $year = substr($head, 0, 2);
+ my $month = substr($head, 2);
+ if ($month > 12) {
+ return "The month '$month' of the 'eprint' is wrong, it can't be bigger than 12"
+ }
+ if (not exists $entry{'primaryclass'}) {
+ return "The 'primaryclass' is mandatory when 'archiveprefix' is there"
+ }
+ if (not $entry{'primaryclass'} =~ /^[a-z]{2,}\.[A-Z]{2}$/) {
+ return "The 'primaryclass' must have two parts, like 'cs.PL'"
+ }
+ }
+}
+
+# Check that organization is not mentioned in the booktitle.
+sub check_org_in_booktitle {
+ my (%entry) = @_;
+ my @orgs = ( 'ACM', 'IEEE' );
+ if (exists($entry{'booktitle'})) {
+ my $title = $entry{'booktitle'};
+ foreach my $o (@orgs) {
+ if ($title =~ /^.*\Q$o\E.*$/) {
+ return "The '$o' organization must not be mentioned in the booktitle, use 'publisher' tag instead"
+ }
+ }
+ if ($title =~ /^.*(ACM|IEEE).*$/) {
+ return "Don't mention the"
+ }
+ }
+}
+
# Check that no values have tailing dots.
# Check that there are no spaces before commans.
sub check_typography {
- my (%item) = @_;
- foreach my $key (keys %item) {
- if ($key =~ /^:.*/) {
+ my (%entry) = @_;
+ my %symbols = (
+ '.' => 'dot',
+ ',' => 'comma',
+ ';' => 'semi-colon',
+ ':' => 'colon',
+ '!' => 'exclamation mark',
+ '?' => 'question mark',
+ '-' => 'dash',
+ '--' => 'double dash',
+ '---' => 'triple dash',
+ );
+ my @spaces_around = ( '---' );
+ my @no_spaces_around = ( '--', '-' );
+ my @no_space_before = ( '.', ',', ';', ':', '?', '!' );
+ my @bad_tails = ( '.', ',', ';', ':', '-' );
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:.*/) {
next;
}
- my $value = $item{$key};
- if ($value =~ /.*\.$/ and $key ne 'author') {
- return "The '$key' must not end with a dot"
+ my $value = $entry{$tag};
+ foreach my $s (@bad_tails) {
+ if ($s eq '.' and $tag eq 'author') {
+ next;
+ }
+ if ($value =~ /^.*\Q$s\E$/) {
+ return "The '$tag' must not end with a $symbols{$s}"
+ }
}
- if ($value =~ /.* ,.*/) {
- return "In the '$key', do not put a space before the comma"
+ foreach my $s (@no_space_before) {
+ if ($value =~ /^.*\s\Q$s\E.*$/) {
+ return "In the '$tag', do not put a space before a $symbols{$s}"
+ }
}
+ foreach my $s (@spaces_around) {
+ if ($value =~ /^.*[^\s]\Q$s\E.*$/ or $value =~ /^.*\Q$s\E[^\s].*$/) {
+ return "In the '$tag', put spaces around a $symbols{$s}"
+ }
+ }
+ foreach my $s (@no_spaces_around) {
+ if ($value =~ /^.*\s\Q$s\E\s.*$/) {
+ return "In the '$tag', don't put spaces around a $symbols{$s}"
+ }
+ }
}
}
# Check the year is not mentioned in titles.
sub check_year_in_titles {
- my (%item) = @_;
- my @keys = qw/title booktitle journal/;
- foreach my $key (@keys) {
- if (not exists($item{$key})) {
+ my (%entry) = @_;
+ my @tags = qw/title booktitle journal/;
+ foreach my $tag (@tags) {
+ if (not exists($entry{$tag})) {
next;
}
- my @words = only_words($item{$key});
+ my @words = only_words($entry{$tag});
foreach my $word (@words) {
if ($word =~ /^[1-9][0-9]{3}$/) {
- return "The '$key' must not contain the year $word, it is enough to have the 'year' key"
+ return "The '$tag' must not contain the year $word, it is enough to have the 'year' tag"
}
}
}
}
-# Check the right format of the 'booktitle' in the 'inproceedings' item.
+# Check the right format of the 'booktitle' in the 'inproceedings' entry.
sub check_booktile_of_inproceedings {
- my (%item) = @_;
- my $key = 'inproceedings';
- if ($item{':type'} eq $key) {
- if (exists $item{'booktitle'}) {
- my @words = only_words($item{'booktitle'});
+ my (%entry) = @_;
+ my $tag = 'inproceedings';
+ if ($entry{':type'} eq $tag) {
+ if (exists $entry{'booktitle'}) {
+ my @words = only_words($entry{'booktitle'});
if (lc($words[0]) ne 'proceedings' or lc($words[1]) ne 'of' or lc($words[2]) ne 'the') {
- return "The '$key' must start with 'Proceedings of the ...'"
+ return "The '$tag' must start with 'Proceedings of the ...'"
}
}
}
@@ -198,10 +274,10 @@
# Check the right format of the 'doi.'
sub check_doi {
- my (%item) = @_;
- if (exists $item{'doi'}) {
- my $doi = $item{'doi'};
- if (not $item{'doi'} =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-]+$/) {
+ my (%entry) = @_;
+ if (exists $entry{'doi'}) {
+ my $doi = $entry{'doi'};
+ if (not $doi =~ /^[0-9a-zA-Z.]+\/[0-9a-zA-Z._\-)(]+$/) {
return "The format of the 'doi' is wrong"
}
}
@@ -209,10 +285,10 @@
# Check the right format of the 'year.'
sub check_year {
- my (%item) = @_;
- if (exists $item{'year'}) {
- my $year = $item{'year'};
- if (not $item{'year'} =~ /^[0-9]{3,4}$/) {
+ my (%entry) = @_;
+ if (exists $entry{'year'}) {
+ my $year = $entry{'year'};
+ if (not $year =~ /^[0-9]{3,4}$/) {
return "The format of the 'year' is wrong"
}
}
@@ -220,10 +296,10 @@
# Check the right format of the 'month.'
sub check_month {
- my (%item) = @_;
- if (exists $item{'month'}) {
- my $month = $item{'month'};
- if (not $item{'month'} =~ /^[1-9]|10|11|12$/) {
+ my (%entry) = @_;
+ if (exists $entry{'month'}) {
+ my $month = $entry{'month'};
+ if (not $month =~ /^[1-9]|10|11|12$/) {
return "The format of the 'month' is wrong"
}
}
@@ -231,10 +307,10 @@
# Check the right format of the 'volume.'
sub check_volume {
- my (%item) = @_;
- if (exists $item{'volume'}) {
- my $volume = $item{'volume'};
- if (not $item{'volume'} =~ /^[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'volume'}) {
+ my $volume = $entry{'volume'};
+ if (not $volume =~ /^[1-9][0-9]*$/) {
return "The format of the 'volume' is wrong"
}
}
@@ -242,10 +318,10 @@
# Check the right format of the 'number.'
sub check_number {
- my (%item) = @_;
- if (exists $item{'number'}) {
- my $number = $item{'number'};
- if (not $item{'number'} =~ /^[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'number'}) {
+ my $number = $entry{'number'};
+ if (not $number =~ /^[1-9][0-9]*$/) {
return "The format of the 'number' is wrong"
}
}
@@ -253,14 +329,17 @@
# Check the right format of the 'pages.'
sub check_pages {
- my (%item) = @_;
- if (exists $item{'pages'}) {
- my $pages = $item{'pages'};
- if (not $item{'pages'} =~ /^[1-9][0-9]*--[1-9][0-9]*|[1-9][0-9]*$/) {
+ my (%entry) = @_;
+ if (exists $entry{'pages'}) {
+ my $pages = $entry{'pages'};
+ if (not $pages =~ /^([1-9][0-9]*--[1-9][0-9]*|[1-9][0-9]*)$/) {
return "The format of the 'pages' is wrong"
}
my @parts = split(/--/, $pages);
if (@parts+0 eq 2) {
+ if ($parts[0] eq $parts[1]) {
+ return "The 'pages' mentions the same page twice, just use it once"
+ }
if ($parts[0] gt $parts[1]) {
return "The 'pages' are in the wrong order"
}
@@ -268,9 +347,9 @@
}
}
-# Check one item.
-sub process_item {
- my (%item) = @_;
+# Check one entry.
+sub process_entry {
+ my (%entry) = @_;
my @checks;
foreach my $entry (keys %bibcop::) {
if ($entry =~ /^check_/) {
@@ -281,7 +360,7 @@
my @errors;
foreach my $check (@sorted) {
no strict 'refs';
- my $err = $check->(%item);
+ my $err = $check->(%entry);
if ($err ne '') {
push(@errors, $err);
}
@@ -290,14 +369,14 @@
}
# Parse the incoming .bib file and return an array
-# of hash-maps, where each one is a bibitem.
-sub bibitems {
+# of hash-maps, where each one is a bibentry.
+sub entries {
my ($bib) = @_;
- my @items;
+ my @entries;
my $s = 'top';
- my %item;
+ my %entry;
my $acc = '';
- my $key = '';
+ my $tag = '';
my $lineno = 0;
my $nest = 0;
my $escape = 0;
@@ -309,56 +388,56 @@
# ignore the EOL
$lineno = $lineno + 1;
} elsif ($char eq '@' and $s eq 'top') {
- %item = ();
+ %entry = ();
$s = 'start';
$acc = '';
} elsif ($char =~ /[a-z]/ and $s eq 'start') {
# @article
} elsif ($char eq '{' and $s eq 'start') {
- $item{':type'} = substr($acc, 1);
+ $entry{':type'} = substr($acc, 1);
$acc = '';
$s = 'body';
} elsif ($char =~ /[a-zA-Z]/ and $s eq 'body') {
$acc = '';
- $s = 'key';
- } elsif ($char =~ /[a-zA-Z0-9_]/ and $s eq 'key') {
- # reading the key
+ $s = 'tag';
+ } elsif ($char =~ /[a-zA-Z0-9_]/ and $s eq 'tag') {
+ # reading the tag
} elsif ($char =~ /[a-zA-Z0-9]/ and $s eq 'value') {
# reading the value without quotes or brackets
- } elsif ($char eq ',' and $s eq 'key') {
- $item{':name'} = $acc;
+ } elsif ($char eq ',' and $s eq 'tag') {
+ $entry{':name'} = $acc;
$s = 'body';
- } elsif ($char eq '=' and $s eq 'key') {
- $key = $acc;
+ } elsif ($char eq '=' and $s eq 'tag') {
+ $tag = $acc;
$s = 'value';
$acc = '';
} elsif ($char eq ',' and $s eq 'value') {
- if (not exists $item{lc($key)}) {
+ if (not exists $entry{lc($tag)}) {
my $tex = substr($acc, 1);
$tex =~ s/\s//g;
- $item{lc($key)} = $tex;
+ $entry{lc($tag)} = $tex;
}
$s = 'body';
} elsif ($char eq '}' and $s eq 'body') {
- push(@items, { %item });
+ push(@entries, { %entry });
$s = 'top';
} elsif ($char eq '}' and $s eq 'value') {
- if (not exists $item{lc($key)}) {
+ if (not exists $entry{lc($tag)}) {
my $tex = substr($acc, 1);
$tex =~ s/\s//g;
- $item{lc($key)} = $tex;
+ $entry{lc($tag)} = $tex;
}
- push(@items, { %item });
+ push(@entries, { %entry });
$s = 'top';
- } elsif ($char eq '}' and $s eq 'key') {
- $item{':name'} = $acc;
- push(@items, { %item });
+ } elsif ($char eq '}' and $s eq 'tag') {
+ $entry{':name'} = $acc;
+ push(@entries, { %entry });
$s = 'top';
} elsif ($char eq '"' and $s eq 'value') {
$s = 'quote';
$acc = '';
} elsif ($char eq '"' and $s eq 'quote') {
- $item{lc($key)} = substr($acc, 1);
+ $entry{lc($tag)} = substr($acc, 1);
$s = 'value';
} elsif ($s eq 'quote') {
# nothing
@@ -374,13 +453,13 @@
} elsif ($char eq '}' and $escape ne 1) {
$nest = $nest - 1;
if ($nest eq 0) {
- $item{lc($key)} = substr($acc, 1);
+ $entry{lc($tag)} = substr($acc, 1);
$s = 'value';
}
}
$escape = 0;
} else {
- print "\\PackageWarningNoLine{bibcop}{It is impossible to parse the .bib file, because I do not know what to do with '$char' at line #$lineno (s=$s)}\n";
+ warning("It is impossible to parse the .bib file, because I do not know what to do with '$char' at line #$lineno (s=$s)");
last;
}
if ($char eq ' ' and not($s =~ /quote|brackets/)) {
@@ -388,7 +467,7 @@
}
$acc = $acc . $char;
}
- return @items;
+ return @entries;
}
# Takes the text and returns only list of words seen there.
@@ -407,15 +486,15 @@
return $tex;
}
-# Take a bibitem and print all its keys as a comma-separated string.
-sub listed_keys {
- my (%item) = @_;
+# Take a bibentry and print all its tags as a comma-separated string.
+sub listed_tags {
+ my (%entry) = @_;
my @list;
- foreach my $key (keys %item) {
- if ($key =~ /^:.*/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:.*/) {
next;
}
- push(@list, $key);
+ push(@list, $tag);
}
my @sorted = sort @list;
return '(' . join(', ', @sorted) . ')';
@@ -424,7 +503,11 @@
# Print ERROR message to the console and die.
sub error {
my ($txt) = @_;
- print $txt . "\n";
+ if (exists $args{'--latex'}) {
+ print "\\PackageError{bibcop}{$txt}\n";
+ } else {
+ print $txt . "\n";
+ }
exit 1;
}
@@ -431,6 +514,18 @@
# Print DEBUG message to the console.
sub debug {
my ($txt) = @_;
+ if (exists $args{'--verbose'}) {
+ if (exists $args{'--latex'}) {
+ print "\\message{bibcop: $txt^^J}\n";
+ } else {
+ print $txt . "\n";
+ }
+ }
+}
+
+# Print INFO message to the console.
+sub info {
+ my ($txt) = @_;
if (exists $args{'--latex'}) {
print '% ';
}
@@ -447,57 +542,64 @@
}
}
-if (@ARGV+0 eq 0 or exists $args{'--help'}) {
- debug("Bibcop is a Style Checker of .bib Files\n" .
- "Usage: bibcop [<options>] <.bib file path>\n" .
- " --version Print the current version of the tool and exit\n" .
- " --help Print this help screen\n" .
- " --fix Fix the errors and print a new version of the .bib file to the console\n" .
- " --latex Report errors in LaTeX format using \\PackageWarningNoLine command");
-} elsif (exists $args{'--version'}) {
- debug('0.0.3');
+if (@ARGV+0 eq 0 or exists $args{'--help'} or exists $args{'-?'}) {
+ info("Bibcop is a Style Checker of BibTeX Files\n\n" .
+ "Usage:\n" .
+ " bibcop [<options>] <.bib file path>\n\n" .
+ "Options:\n" .
+ " -v, --version Print the current version of the tool and exit\n" .
+ " -?, --help Print this help screen\n" .
+ " --fix Fix the errors and print a new version of the .bib file to the console\n" .
+ " --verbose Print supplementary debugging information\n" .
+ " --latex Report errors in LaTeX format using \\PackageWarningNoLine command\n\n" .
+ "If any issues, report to GitHub: https://github.com/yegor256/bibcop");
+} elsif (exists $args{'--version'} or exists $args{'-v'}) {
+ info('0.0.4');
} else {
my ($file) = grep { not($_ =~ /^--.*$/) } @ARGV;
+ if (not $file) {
+ error('File name must be specified');
+ }
open(my $fh, '<', $file);
my $bib; { local $/; $bib = <$fh>; }
- my @items = bibitems($bib);
+ my @entries = entries($bib);
if (exists $args{'--fix'}) {
- for my $i (0..(@items+0 - 1)) {
- my %item = %{ $items[$i] };
- my $type = $item{':type'};
+ for my $i (0..(@entries+0 - 1)) {
+ my %entry = %{ $entries[$i] };
+ my $type = $entry{':type'};
if (not exists $blessed{$type}) {
- error("I don't know what to do with \@$type type of bibitem");
+ error("I don't know what to do with \@$type type of bibentry");
}
- my $keys = $blessed{$item{':type'}};
- my %allowed = map { $_ => 1 } @$keys;
+ my $tags = $blessed{$entry{':type'}};
+ my %allowed = map { $_ => 1 } @$tags;
my @lines;
- foreach my $key (keys %item) {
- if ($key =~ /^:/) {
+ foreach my $tag (keys %entry) {
+ if ($tag =~ /^:/) {
next;
}
- if (not exists $allowed{$key} and not exists $allowed{$key . '?'}) {
+ if (not exists $allowed{$tag} and not exists $allowed{$tag . '?'}) {
next;
}
- my $value = clean_tex($item{$key});
- if ($key =~ /title|booktitle|journal/) {
+ my $value = clean_tex($entry{$tag});
+ if ($tag =~ /title|booktitle|journal/) {
$value = '{' . $value . '}';
}
- push(@lines, " $key = {$value},");
+ push(@lines, " $tag = {$value},");
}
- debug("\@$type\{$item{':name'},");
+ info("\@$type\{$entry{':name'},");
my @sorted = sort @lines;
foreach my $line (@sorted) {
- debug($line);
+ info($line);
}
- debug("}\n");
+ info("}\n");
}
} else {
- debug((@items+0) . ' bibitems found in ' . $file);
- for my $i (0..(@items+0 - 1)) {
- my %item = %{ $items[$i] };
- debug("Checking $item{':name'} (#$i)...");
- foreach my $err (process_item(%item)) {
- warning("$err, in the '$item{':name'}' bibitem");
+ debug((@entries+0) . ' entries found in ' . $file);
+ for my $i (0..(@entries+0 - 1)) {
+ my %entry = %{ $entries[$i] };
+ debug("Checking $entry{':name'} (no.$i)...");
+ foreach my $err (process_entry(%entry)) {
+ warning("$err, in the '$entry{':name'}' bib entry");
}
}
}
Modified: trunk/Master/texmf-dist/source/latex/bibcop/bibcop.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/bibcop/bibcop.dtx 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/source/latex/bibcop/bibcop.dtx 2022-12-15 20:43:35 UTC (rev 65279)
@@ -50,7 +50,7 @@
%<package>\NeedsTeXFormat{LaTeX2e}
%<package>\ProvidesPackage{bibcop}
%<*package>
-[2022-12-13 0.0.3 Style Checker of Bibliography Files]
+[2022-12-15 0.0.4 Style Checker of Bibliography Files]
%</package>
%<*driver>
\documentclass{ltxdoc}
@@ -68,9 +68,9 @@
\CodelineIndex
\RecordChanges
\begin{document}
- \DocInput{bibcop.dtx}
- \PrintChanges
- \PrintIndex
+ \DocInput{bibcop.dtx}
+ \PrintChanges
+ \PrintIndex
\end{document}
%</driver>
% \fi
@@ -88,8 +88,8 @@
% \section{Introduction}
%
-% This package scans your |.bib| files for style errors and emits
-% warning messages if any issues are found (make sure you use it before
+% This package scans a |.bib| file for style errors and emits
+% warning messages if any issues are found (the package must be included before
% all other bibliography related packages):
%\iffalse
%<*verb>
@@ -105,40 +105,57 @@
%\iffalse
%</verb>
%\fi
-% You may see warnings in the \TeX{} log --- fix the issues in the |main.bib| file
-% and the warnings will disappear.
+% Some warnings may be printed in the \TeX{} log.
+% Once the issues in the |main.bib| file are fixed, the warnings disappear.
-% If you use the |.sty| file (without installing it into the \TeX{} tree), don't forget
-% to also copy the |bibcop.pl| file --- it is the Perl script that does all the work
+% If the |.sty| file is used directly (without installing it into the \TeX{} tree),
+% the |bibcop.pl| file must also be placed next to it --- it is the Perl script that does all the work
% of checking your |.bib| files. The |.sty| is just a simple wrapper around it.
-% Make sure |\usepackage{bibcop}| stays right after |\usepackage{biblatex}|
-% (if you use it), otherwise
-% you won't see any warnings from |bibcop|.
+% The |\usepackage{bibcop}| must stay right after |\usepackage{biblatex}|
+% (if Bib\LaTeX{} is used), otherwise
+% there won't be any warnings from |bibcop|.
+% \section{Package Options}
+
+% It's possible to configure the behavior of the package with the help of a few package options:
+
+% \DescribeMacro{verbose}
+% The |verbose| package option prints all debugging messages to the \TeX{} log:
+%\iffalse
+%<*verb>
+%\fi
+\begin{verbatim}
+\usepackage[verbose]{bibcop}
+\end{verbatim}
+%\iffalse
+%</verb>
+%\fi
+
% \section{The Rules}
-% This is a more or less complete list of rules we enforce on a |.bib| file:
+% This is a more or less complete list of rules enforced on a |.bib| file:
% \DescribeMacro{types}
-% Only |@article|, |@book|, |@inproceedings|, and |@misc| types of bib items are allowed.
+% Only |@article|, |@book|, |@inproceedings|, and |@misc| types of \BibTeX{} entries are allowed.
% Everything else, like |@manual|, |@phdthesis|, and many others are simply prohibited.
% The mentioned four should be enough for everything.
-% \DescribeMacro{keys}
-% There are pretty limited lists of allowed keys for each type of bib item. The keys
+% \DescribeMacro{tags}
+% There are pretty limited lists of allowed tags for each type of \BibTeX{} entry. The tags
% that are not in the list are prohibited to use.
% \DescribeMacro{doi}
-% Every bib item must have the |doi| key, which is a unique
+% Every \BibTeX{} entry must have the |doi| tag, which is a unique
% \href{https://www.doi.org}{Digital Object Identifier}
% of the material that you reference. It seems to be a good practice, in order to avoid ambiguity,
-% to always mention the DOI. I would also recommend to use \href{https://ctan.org/pkg/iexec}{doi} package,
+% to always mention the DOI.
+% It also recommended to use the \href{https://ctan.org/pkg/iexec}{doi} package,
% in order to make all ``|doi|'' fields turned into hyper links.
% \DescribeMacro{caps}
-% In |title|, |booktitle|, and |journal| keys, all major words must be capitalized,
-% as it is \href{https://apastyle.apa.org/style-grammar-guidelines/capitalization/title-case}{recommended by APA}:
+% In the |title|, |booktitle|, and |journal| tags, all major words must be capitalized,
+% as it is \href{https://apastyle.apa.org/style-grammar-guidelines/capitalization/title-case}{recommended} by APA:
%\iffalse
%<*verb>
%\fi
@@ -151,9 +168,9 @@
% Here, the leading ``|A|'' is capital because it opens the title.
% The word ``|for|'' and the article ``|a|'' are minor words, that's why they are in lower case.
% Both parts of the composite word ``|Data-Flow|'' are capitalized.
-% Sometimes you need to violate this rule and use custom capitalization, as it is done
-% by the author of the paper. In order to do this, wrap the words with custom capitalization
-% in curled brackets, for example:
+% Sometimes this rule may need to be violated, when there is custom capitalization, as it is done
+% by the author of the paper. In order to do this, the words with custom capitalization
+% must be wrapped in curled brackets, for example:
%\iffalse
%<*verb>
%\fi
@@ -190,7 +207,7 @@
% When first names are shortened to a single letter, it has to have a tailing dot.
% \DescribeMacro{shorts}
-% It is not allowed to shorten any words, for example this is illegal:
+% It is not allowed to shorten any words aside from the |author| tag, for example this is illegal:
%\iffalse
%<*verb>
%\fi
@@ -212,7 +229,7 @@
%\fi
% \DescribeMacro{brackets}
-% The |title|, |booktitle|, and |journal| must be wrapped with double brackets, for example:
+% The |title|, |booktitle|, and |journal| must be wrapped in double brackets, for example:
%\iffalse
%<*verb>
%\fi
@@ -237,8 +254,8 @@
%\iffalse
%</verb>
%\fi
-% The year should only be mentioned in the |year| key, nowhere else.
-% In the |year| key only numbers are allowed:
+% The year should only be mentioned in the |year| tag, nowhere else.
+% In the |year| tag only numbers are allowed:
%\iffalse
%<*verb>
%\fi
@@ -298,7 +315,7 @@
%\fi
% \DescribeMacro{proceedings}
-% The |booktitle| in the |@inproceedings| bib item must always start with ``|Proceedings| |of| |the|'',
+% The |booktitle| in the |@inproceedings| entry must always start with ``|Proceedings| |of| |the|'',
% as in this example:
%\iffalse
%<*verb>
@@ -311,11 +328,38 @@
%</verb>
%\fi
+% \DescribeMacro{arXiv}
+% If the |archivePrefix| is present, the |eprint| and the |primaryClass| must also be present and must adhere to the formatting principles of \href{https://arxiv.org/help/arxiv_identifier}{arXiv identifiers}:
+%\iffalse
+%<*verb>
+%\fi
+\begin{verbatim}
+ at misc{bugayenko2021,
+ archivePrefix = {arXiv},
+ eprint = {2111.13384},
+ primaryClass = {cs.PL},
+}
+\end{verbatim}
+%\iffalse
+%</verb>
+%\fi
+
+% \DescribeMacro{typography}
+% All tags in each \BibTeX{} entry are checked for obeying the basic typography rules:
+% \begin{itemize}
+% \item No spaces are allowed in front of a comma, a semi-colon, a colon, a dot, a question mark, and an exclamation mark;
+% \item A text may not end with a dot, a comma, a semi-colon, a colon, or a dash;
+% \item A triple dash must be surrounded by spaces.
+% \end{itemize}
+
% \StopEventually{}
% \section{Implementation}
% \changes{0.0.1}{2022/12/11}{First draft.}
% \changes{0.0.2}{2022/12/12}{Documentation extended, more rules added.}
+% \changes{0.0.4}{2022/12/14}{Extra checks for the typography, together with more extensive Perl testing.}
+% \changes{0.0.4}{2022/12/14}{The \texttt{--verbose} option introduced, to enable debugging information only on demand.}
+% \changes{0.0.4}{2022/12/15}{Package options introduced, the \texttt{verbose} option enables detailed logging inside the \TeX{} log.}
% First, we include a few packages.
% We need \href{https://ctan.org/pkg/iexec}{iexec} for executing Perl scripts:
@@ -323,6 +367,16 @@
\RequirePackage{iexec}
% \end{macrocode}
+% Then, we process package options:
+% \begin{macrocode}
+\RequirePackage{pgfopts}
+\pgfkeys{
+ /bibcop/.cd,
+ verbose/.store in=\bibcop at verbose,
+}
+\ProcessPgfPackageOptions{/bibcop}
+% \end{macrocode}
+
% \begin{macro}{bibcop.pl}
% Then, we copy the Perl script using |\VerbatimCopy| from
% \href{https://ctan.org/pkg/verbatimcopy}{verbatimcopy}:
@@ -338,12 +392,14 @@
% \begin{macrocode}
\makeatletter
\ifdefined\bibliography
- \let\bibcop at oldbibliography\bibliography
- \renewcommand\bibliography[1]{%
- \iexec{perl "./bibcop.tmp.pl" --latex '#1.bib'}%
+ \let\bibcop at oldbibliography\bibliography
+ \renewcommand\bibliography[1]{%
+ \iexec{perl "./bibcop.tmp.pl"\space
+ \ifdefined\bibcop at verbose--verbose\fi\space
+ --latex '#1.bib'}%
\message{bibcop: style checking finished^^J}%
- \bibcop at oldbibliography{#1}%
- }
+ \bibcop at oldbibliography{#1}%
+ }
\fi
\makeatother
% \end{macrocode}
@@ -354,17 +410,23 @@
% \begin{macrocode}
\makeatletter
\ifdefined\addbibresource
- \let\bibcop at oldaddbibresource\addbibresource
- \renewcommand\addbibresource[1]{%
- \iexec{perl "./bibcop.tmp.pl" --latex '#1'}%
+ \let\bibcop at oldaddbibresource\addbibresource
+ \renewcommand\addbibresource[1]{%
+ \iexec{perl "./bibcop.tmp.pl"\space
+ \ifdefined\bibcop at verbose--verbose\fi\space
+ --latex '#1'}%
\message{bibcop: style checking finished^^J}%
- \bibcop at oldaddbibresource{#1}%
- }
+ \bibcop at oldaddbibresource{#1}%
+ }
\fi
\makeatother
% \end{macrocode}
% \end{macro}
+% \begin{macrocode}
+\endinput
+% \end{macrocode}
+
% \Finale
% \clearpage
Modified: trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty 2022-12-15 20:43:14 UTC (rev 65278)
+++ trunk/Master/texmf-dist/tex/latex/bibcop/bibcop.sty 2022-12-15 20:43:35 UTC (rev 65279)
@@ -31,7 +31,7 @@
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{bibcop}
-[2022-12-13 0.0.3 Style Checker of Bibliography Files]
+[2022-12-15 0.0.4 Style Checker of Bibliography Files]
@@ -54,8 +54,20 @@
+
+
+
+
+
\RequirePackage{iexec}
+\RequirePackage{pgfopts}
+\pgfkeys{
+ /bibcop/.cd,
+ verbose/.store in=\bibcop at verbose,
+}
+\ProcessPgfPackageOptions{/bibcop}
+
\RequirePackage{verbatimcopy}
\VerbatimCopy{bibcop.pl}{bibcop.tmp.pl}
\message{bibcop: File with Perl script 'bibcop.pl' copied^^J}%
@@ -62,28 +74,30 @@
\makeatletter
\ifdefined\bibliography
-\let\bibcop at oldbibliography\bibliography
-\renewcommand\bibliography[1]{%
- \iexec{perl "./bibcop.tmp.pl" --latex '#1.bib'}%
+ \let\bibcop at oldbibliography\bibliography
+ \renewcommand\bibliography[1]{%
+ \iexec{perl "./bibcop.tmp.pl"\space
+ \ifdefined\bibcop at verbose--verbose\fi\space
+ --latex '#1.bib'}%
\message{bibcop: style checking finished^^J}%
- \bibcop at oldbibliography{#1}%
-}
+ \bibcop at oldbibliography{#1}%
+ }
\fi
\makeatother
\makeatletter
\ifdefined\addbibresource
-\let\bibcop at oldaddbibresource\addbibresource
-\renewcommand\addbibresource[1]{%
- \iexec{perl "./bibcop.tmp.pl" --latex '#1'}%
+ \let\bibcop at oldaddbibresource\addbibresource
+ \renewcommand\addbibresource[1]{%
+ \iexec{perl "./bibcop.tmp.pl"\space
+ \ifdefined\bibcop at verbose--verbose\fi\space
+ --latex '#1'}%
\message{bibcop: style checking finished^^J}%
- \bibcop at oldaddbibresource{#1}%
-}
+ \bibcop at oldaddbibresource{#1}%
+ }
\fi
\makeatother
-
-
\endinput
%%
%% End of file `bibcop.sty'.
More information about the tex-live-commits
mailing list.