[l2h] Escape braces in regex
Shigeharu TAKENO
shige at iee.niit.ac.jp
Tue Jul 24 10:20:18 CEST 2018
shige 07/24 2018
----------------
Recent perl (especially perl 5.26) may warn "Unescaped left brace
in regex ...". So, I made a patch for such possible regexes
(latex2html.pin, styles/alltt.perl, styles/graphics-support.perl,
versions/html3_1.pl, versions/math.pl)
and for a bug in regex in styles/frames.perl for latex2html-2018.
----- From here -----
diff -uN latex2html-2018/latex2html.pin.ORG latex2html-2018/latex2html.pin
--- latex2html-2018/latex2html.pin.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/latex2html.pin 2018-07-24 17:11:23.441258000 +0900
@@ -1406,8 +1406,8 @@
&replace_html_special_chars;
# Remove fake environment which should be invisible to LaTeX2HTML.
s/\001//m;
- s/[%]end\s*{latexonly}/\001/gom;
- s/[%]begin\s*{latexonly}([^\001]*)\001/%/gos;
+ s/[%]end\s*\{latexonly\}/\001/gom;
+ s/[%]begin\s*\{latexonly\}([^\001]*)\001/%/gos;
s/\001//m;
&preprocess_alltt if defined(&preprocess_alltt);
@@ -1431,13 +1431,13 @@
&write_mydb("verbatim", $global{'verbatim_counter'}, $3);
"$1$comment_mark".$global{'verbatim_counter'}."\n"/mge;
# Remove the htmlonly-environment
- s/\\begin\s*{htmlonly}\s*\n?//gom;
- s/\\end\s*{htmlonly}\s*\n?//gom;
+ s/\\begin\s*\{htmlonly\}\s*\n?//gom;
+ s/\\end\s*\{htmlonly\}\s*\n?//gom;
# Remove enviroments which should be invisible to LaTeX2HTML.
- s/\n[^%\n]*\\end\s*{latexonly}\s*\n?/\001/gom;
- s/((^|\n)[^%\n]*)\\begin\s*{latexonly}([^\001]*)\001/$1/gom;
- s/\\end\s*{comment}\s*\n?/\001/gom;
- s/\\begin\s*{comment}([^\001]*)\001//gom;
+ s/\n[^%\n]*\\end\s*\{latexonly\}\s*\n?/\001/gom;
+ s/((^|\n)[^%\n]*)\\begin\s*\{latexonly\}([^\001]*)\001/$1/gom;
+ s/\\end\s*\{comment\}\s*\n?/\001/gom;
+ s/\\begin\s*\{comment\}([^\001]*)\001//gom;
# this used to be earlier, but that can create problems with comments
&wrap_other_environments if (%other_environments);
@@ -1905,14 +1905,14 @@
# MRO: removed deprecated $*, replaced by option /m
$_[0] =~ s/(^|[^\\])\\\{/$1tex2html_escaped_opening_bracket/gom;
$_[0] =~ s/(^|[^\\])\\\{/$1tex2html_escaped_opening_bracket/gom; # repeat this
- $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom;
- $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this
+ $_[0] =~ s/(^|[^\\])\\\}/$1tex2html_escaped_closing_bracket/gom;
+ $_[0] =~ s/(^|[^\\])\\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this
my $id = $global{'max_id'};
my $prev_id = $id;
# mark all balanced braces
# MRO: This should in fact mark all of them as the hierarchy is
# processed inside-out.
- 1 while($_[0] =~ s/{([^{}]*)}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo);
+ 1 while($_[0] =~ s/\{([^{}]*)\}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo);
# What follows seems esoteric...
my @processedB = ();
# Take one opening brace at a time
@@ -1927,7 +1927,7 @@
}
$_[0] = join('',$before,"\{",$after) if($change);
# MRO: mark one opening brace
- if($_[0] =~ s/^([^{]*){/push(@processedB,$1);join('',$O,++$id,$C)/eos) {
+ if($_[0] =~ s/^([^{]*)\{/push(@processedB,$1);join('',$O,++$id,$C)/eos) {
$before=''; $after=$';
}
if ($after =~ /\}/) {
@@ -2028,7 +2028,7 @@
if ($xafter =~ /noexpand/) { $before .= "\\$funct"; next; }
s/^[\s%]*(.)/$delim=$1;''/eo;
- if ($delim =~ /{/ ) {
+ if ($delim =~ /\{/ ) {
# brackets not yet numbered...
# $before .= $funct . $delim;
push(@case_processed, $funct . $delim);
@@ -5861,7 +5861,7 @@
$this_cmd = join(''
, "command{\\$cmd}"
, ($argn ? "[$argn]" :'')
- , (($opt =~ /^}$/) ? '' : "[$opt]" )
+ , (($opt =~ /^\}$/) ? '' : "[$opt]" )
, "{", $body , "}" );
$this_cmd = &revert_to_raw_tex($this_cmd);
if ($renewed) {
@@ -5876,7 +5876,7 @@
# local($this_cmd) = join(''
# , "\n\\renewcommand{\\$cmd}"
# , ($argn ? "[$argn]" :'')
-# , (($opt =~ /^}$/) ? '' : "[$opt]" )
+# , (($opt =~ /^\}$/) ? '' : "[$opt]" )
# , "{", $body , "}\n" );
# $latex_body .= &revert_to_raw_tex($this_cmd);
$latex_body .= "\n\\renew". $this_cmd."\n";
@@ -6876,7 +6876,7 @@
push(@preamble, $next_def );
}
else {
- ($name) = $next_def =~ /$marker\s*({[^}]+})/; # matches type{name}
+ ($name) = $next_def =~ /$marker\s*(\{[^}]+\})/; # matches type{name}
$name = &escape_rx_chars($name);
# $preamble .= $next_def . "\n" unless ($preamble =~ /$marker\s*$name/);
push(@preamble, $name );
@@ -6931,7 +6931,7 @@
# \usepackage is invalid in LaTeX 2.09 and LaTeX-2e compatibility mode
$LATEX_COLOR = ''; $LOAD_LATEX_COLOR = '';
# ... so is \providecommand
- $preamble =~ s/\\documentstyle[^{]*{[^}]*}\n?/
+ $preamble =~ s/\\documentstyle[^{]*\{[^}]*\}\n?/
$&."\n\\let\\providecommand\\newcommand\n"/eo;
}
@@ -7378,7 +7378,7 @@
sub encode {
local($_) = @_;
# Remove invocation-specific stuff
- 1 while(s/\\(begin|end)\s*(($O|$OP)\d+($C|$CP))?|{?tex2html_(wrap|nowrap|deferred|)(_\w+)?}?(\2)?//go);
+ 1 while(s/\\(begin|end)\s*(($O|$OP)\d+($C|$CP))?|\{?tex2html_(wrap|nowrap|deferred|)(_\w+)?\}?(\2)?//go);
$_ = &revert_to_raw_tex($_);
s/\\protect//g; # remove redundant \protect macros
#$_ = pack("u*", $_); # uuencode
@@ -9648,15 +9648,15 @@
s/(\\\w+)?$tex2html_wrap_rx([^\\\n])?/$tmp=$2;
((($tmp eq 'end')&&($1)&&!($5)&&($6))? "$1 $6":"$1$5$6")/egs;
undef $tmp;
- s/\s*\\newedcommand\s*{/"%\n\\providecommand{\\"/gem;
- s/\\newedcommand\s*{/\\providecommand{\\/gom;
-# s/(\n*)\\renewedcommand{/($1? "\n":'')."\\renewcommand{\\"/geo;
- s/\s*\\providedcommand\s*{/"%\n\\providecommand{\\"/gem;
-# s/\\providedcommand{/\\providecommand{\\/go;
+ s/\s*\\newedcommand\s*\{/"%\n\\providecommand{\\"/gem;
+ s/\\newedcommand\s*\{/\\providecommand{\\/gom;
+# s/(\n*)\\renewedcommand\{/($1? "\n":'')."\\renewcommand{\\"/geo;
+ s/\s*\\providedcommand\s*\{/"%\n\\providecommand{\\"/gem;
+# s/\\providedcommand\{/\\providecommand{\\/go;
s/\\renewedenvironment\s*/\\renewenvironment/gom;
- s/\\newedboolean\s*{/\\newboolean{/gom;
- s/\\newedcounter\s*{/\\newcounter{/gom;
- s/\\newedtheorem\s*{/\\newtheorem{/gom;
+ s/\\newedboolean\s*\{/\\newboolean{/gom;
+ s/\\newedcounter\s*\{/\\newcounter{/gom;
+ s/\\newedtheorem\s*\{/\\newtheorem{/gom;
s/\\xystar/\\xy\*/gom; # the * has a special meaning in Xy-pic
#fix-up the star'd environment names
@@ -14391,7 +14391,7 @@
sub do_body_newcounter {
local($ctr) = @_;
$latex_body .= &revert_to_raw_tex("\\newcounter{$ctr}\n")
- unless ($preamble =~ /\\new(counter|theorem){$ctr}/);
+ unless ($preamble =~ /\\new(counter|theorem)\{$ctr\}/);
$global{$ctr} = 0;
&process_commands_wrap_deferred("the$ctr ");
$_;
@@ -16590,7 +16590,7 @@
# used for labels in {enumerate} environments
$standard_label_rx =
"\\s*[[]\\s*((($any_next_pair_rx4)|([[][^]]*[]])|[^]])*)[]]";
- $enum_label_rx = "^((({[^{}]*})|([^{}]))*)([aAiI1])(.*)";
+ $enum_label_rx = "^(((\{[^{}]*\})|([^{}]))*)([aAiI1])(.*)";
$enum_level = 0; # level for enumerate (1-4, i-iv)
@@ -16863,7 +16863,7 @@
sub make_order_sensitive_rx {
local(@theorem_alts, $theorem_alts);
- @theorem_alts = ($preamble =~ /\\newtheorem\s*{([^\s}]+)}/og);
+ @theorem_alts = ($preamble =~ /\\newtheorem\s*\{([^\s}]+)\}/og);
$theorem_alts = join('|', at theorem_alts);
#
# HWS: Added kludge to require counters to be more than 2 characters long
diff -uN latex2html-2018/styles/alltt.perl.ORG latex2html-2018/styles/alltt.perl
--- latex2html-2018/styles/alltt.perl.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/alltt.perl 2018-07-24 17:11:23.446005000 +0900
@@ -51,11 +51,11 @@
local ($alltt_begin) = "<alltt_begin>";
local ($alltt_end) = "<alltt_end>";
local($saveRS) = $/; undef $/;
- while (/\\begin\s*{($alltt_rx)}([ \t]*\n)?/m) {
+ while (/\\begin\s*\{($alltt_rx)\}([ \t]*\n)?/m) {
$alltt_env = $1;
$alltt = "";
($before, $after) = ($`, $');
- if ($after =~ /\\end\s*{($alltt_rx)}/sm) {
+ if ($after =~ /\\end\s*\{($alltt_rx)\}/sm) {
($alltt, $after) = ($`, $');
local(@check) = split("\n",$before);
local($lastline) = pop @check unless ($before =~ s/\n$//sm);
diff -uN latex2html-2018/styles/graphics-support.perl.ORG latex2html-2018/styles/graphics-support.perl
--- latex2html-2018/styles/graphics-support.perl.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/graphics-support.perl 2018-07-24 17:11:23.450316000 +0900
@@ -172,21 +172,21 @@
#RRM: may only work correctly for Unix
# $dd holds the directory-delimiter, usually /
- $paths =~ s/\s*({|})\s*/$1/g;
- local(@paths) = split (/}/, $paths);
+ $paths =~ s/\s*(\{|\})\s*/$1/g;
+ local(@paths) = split (/\}/, $paths);
if ($DESTDIR eq $FILE) {
# given paths are relative to parent directory
- map(s|^{([^/~\.\$\\][^}]*)|{..$dd$1|, @paths);
- map(s/^{\.\Q$dd\E/{\.\.$dd/, @paths);
+ map(s|^\{([^/~\.\$\\][^}]*)|{..$dd$1|, @paths);
+ map(s/^\{\.\Q$dd\E/{\.\.$dd/, @paths);
} elsif ($DESTDIR eq '.') {
# paths are already relative to working directory
} else {
# specify full paths, by prepending source directory
- map(s|^{([^/~\.\$\\][^}]*)|{$orig_cwd$dd$1|, @paths);
- map(s/^{\.\Q$dd\E/{$orig_cwd$dd/, @paths);
+ map(s|^\{([^/~\.\$\\][^}]*)|{$orig_cwd$dd$1|, @paths);
+ map(s/^\{\.\Q$dd\E/{$orig_cwd$dd/, @paths);
}
$paths = join('}', @paths).'}';
- map(s/^{//, at paths); # Strip leading { and trailing $dd
+ map(s/^\{//, at paths); # Strip leading { and trailing $dd
map(s/\Q$dd\E$//, at paths);
$GRAPHICS_PATH = [@$GRAPHICS_PATH, at paths];
diff -uN latex2html-2018/versions/html3_1.pl.ORG latex2html-2018/versions/html3_1.pl
--- latex2html-2018/versions/html3_1.pl.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/versions/html3_1.pl 2018-07-24 17:11:23.457697000 +0900
@@ -326,8 +326,8 @@
# Inside <MATH>, { and } have special meaning. Thus, need {
# and }
# s/{/{/g; s/}/}/g; # Where are these defined ?
- s/{/{/g;
- s/}/}/g;
+ s/\{/{/g;
+ s/\}/}/g;
# Remove the safety markers for math-entities
s/(\&\w+)#\w+;/$1;/g;
@@ -1579,7 +1579,7 @@
local($extra) = &get_supsub;
# contents of $extra may require an image !!
# revert the brace-pairs
- if ($extra =~ /{|}/) {
+ if ($extra =~ /\{|\}/) {
&mark_string($extra);
$extra =~ s/$O(\d+)$C/$OP$1$CP/g;
}
diff -uN latex2html-2018/versions/math.pl.ORG latex2html-2018/versions/math.pl
--- latex2html-2018/versions/math.pl.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/versions/math.pl 2018-07-24 17:11:23.466156000 +0900
@@ -121,7 +121,7 @@
local($saved) = $_;
# s/(^\s*(\$|\\\()\s*|\s*(\$|\\\))\s*$)//g; # remove the \$ signs or \(..\)
# s/^\\ensuremath(($O|$OP)\d+($C|$CP))(.*)\1/$4/; # remove an ensuremath wrapper
- if (s/^$math_start_rx|$math_end_rx$//gs ) {}
+ if (s/^$math_start_rx|${math_end_rx}$//gs ) {}
elsif (s/^\\ensuremath(($O|$OP)\d+($C|$CP))(.*)\1/$4/){} # remove an ensuremath wrapper
else { $failed = 1 }
s/\\(begin|end)(($O|$OP)\d+($C|$CP))tex2html_wrap\w*\2//g; # remove wrappers
@@ -370,9 +370,9 @@
} else {
# Inside <MATH>, { and } have special meaning. Thus, need {
# and }
-# s/{/{/g; s/}/}/g; # Where are these defined ?
- s/{/{/g;
- s/}/}/g;
+# s/\{/{/g; s/}/}/g; # Where are these defined ?
+ s/\{/{/g;
+ s/\}/}/g;
# Remove the safety markers for math-entities
s/(\&\w+)#\w+;/$1;/g;
@@ -1657,7 +1657,7 @@
local($extra) = &get_supsub;
# contents of $extra may require an image !!
# revert the brace-pairs
- if ($extra =~ /{|}/) {
+ if ($extra =~ /\{|\}/) {
&mark_string($extra);
$extra =~ s/$O(\d+)$C/$OP$1$CP/g;
}
diff -uN latex2html-2018/styles/frames.perl.ORG latex2html-2018/styles/frames.perl
--- latex2html-2018/styles/frames.perl.ORG 2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/frames.perl 2018-07-24 17:11:28.606392000 +0900
@@ -706,7 +706,7 @@
$_[0] =~ s/<(META NAME|LINK)[^>]*>\s*//g;
$_[0] =~ s/$more_links_mark/$NO_ROBOTS\n$LATEX2HTML_META/g;
local($savedRS)=$/; $/ = '';
- $_[0] =~ s/\n\{2;}/\n/sg;
+ $_[0] =~ s/\n\{2,}/\n/sg;
$_[0] =~ s/\s$//s;
$_[0] =~ s!\s*(\n</HEAD>\n)\s*!$1!s;
$/ = $savedRS;
----- To here -----
+========================================================+
Shigeharu TAKENO NIigata Institute of Technology
kashiwazaki,Niigata 945-1195 JAPAN
shige at iee.niit.ac.jp TEL(&FAX): +81-257-22-8161
+========================================================+
More information about the latex2html
mailing list