[l2h] Escape braces in regex

Shigeharu TAKENO shige at iee.niit.ac.jp
Tue Jul 24 10:20:18 CEST 2018


shige 07/24 2018
----------------

Recent perl (especially perl 5.26) may warn "Unescaped left brace
in regex ...". So, I made a patch for such possible regexes 
(latex2html.pin, styles/alltt.perl, styles/graphics-support.perl,
versions/html3_1.pl, versions/math.pl) 
and for a bug in regex in styles/frames.perl for latex2html-2018.

----- From here -----
diff -uN latex2html-2018/latex2html.pin.ORG latex2html-2018/latex2html.pin
--- latex2html-2018/latex2html.pin.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/latex2html.pin	2018-07-24 17:11:23.441258000 +0900
@@ -1406,8 +1406,8 @@
     &replace_html_special_chars;
     # Remove fake environment which should be invisible to LaTeX2HTML.
     s/\001//m;
-    s/[%]end\s*{latexonly}/\001/gom;
-    s/[%]begin\s*{latexonly}([^\001]*)\001/%/gos;
+    s/[%]end\s*\{latexonly\}/\001/gom;
+    s/[%]begin\s*\{latexonly\}([^\001]*)\001/%/gos;
     s/\001//m;
 
     &preprocess_alltt if defined(&preprocess_alltt);
@@ -1431,13 +1431,13 @@
 	&write_mydb("verbatim", $global{'verbatim_counter'}, $3);
 	"$1$comment_mark".$global{'verbatim_counter'}."\n"/mge;
     # Remove the htmlonly-environment
-    s/\\begin\s*{htmlonly}\s*\n?//gom;
-    s/\\end\s*{htmlonly}\s*\n?//gom;
+    s/\\begin\s*\{htmlonly\}\s*\n?//gom;
+    s/\\end\s*\{htmlonly\}\s*\n?//gom;
     # Remove enviroments which should be invisible to LaTeX2HTML.
-    s/\n[^%\n]*\\end\s*{latexonly}\s*\n?/\001/gom;
-    s/((^|\n)[^%\n]*)\\begin\s*{latexonly}([^\001]*)\001/$1/gom;
-    s/\\end\s*{comment}\s*\n?/\001/gom;
-    s/\\begin\s*{comment}([^\001]*)\001//gom;
+    s/\n[^%\n]*\\end\s*\{latexonly\}\s*\n?/\001/gom;
+    s/((^|\n)[^%\n]*)\\begin\s*\{latexonly\}([^\001]*)\001/$1/gom;
+    s/\\end\s*\{comment\}\s*\n?/\001/gom;
+    s/\\begin\s*\{comment\}([^\001]*)\001//gom;
 
     # this used to be earlier, but that can create problems with comments
     &wrap_other_environments if (%other_environments);
@@ -1905,14 +1905,14 @@
     # MRO: removed deprecated $*, replaced by option /m
     $_[0] =~ s/(^|[^\\])\\\{/$1tex2html_escaped_opening_bracket/gom;
     $_[0] =~ s/(^|[^\\])\\\{/$1tex2html_escaped_opening_bracket/gom; # repeat this
-    $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom;
-    $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this
+    $_[0] =~ s/(^|[^\\])\\\}/$1tex2html_escaped_closing_bracket/gom;
+    $_[0] =~ s/(^|[^\\])\\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this
     my $id = $global{'max_id'};
     my $prev_id = $id;
     # mark all balanced braces
     # MRO: This should in fact mark all of them as the hierarchy is
     # processed inside-out.
-    1 while($_[0] =~ s/{([^{}]*)}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo);
+    1 while($_[0] =~ s/\{([^{}]*)\}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo);
     # What follows seems esoteric...
     my @processedB = ();
     # Take one opening brace at a time
@@ -1927,7 +1927,7 @@
         }
         $_[0] = join('',$before,"\{",$after) if($change);
         # MRO: mark one opening brace
-	if($_[0] =~ s/^([^{]*){/push(@processedB,$1);join('',$O,++$id,$C)/eos) {
+	if($_[0] =~ s/^([^{]*)\{/push(@processedB,$1);join('',$O,++$id,$C)/eos) {
 	    $before=''; $after=$';
         }
         if ($after =~ /\}/) { 
@@ -2028,7 +2028,7 @@
 	if ($xafter =~ /noexpand/) { $before .= "\\$funct"; next; }
 
 	s/^[\s%]*(.)/$delim=$1;''/eo;
-	if ($delim =~ /{/ ) {
+	if ($delim =~ /\{/ ) {
             # brackets not yet numbered...
 #	    $before .= $funct . $delim;
 	    push(@case_processed, $funct . $delim);
@@ -5861,7 +5861,7 @@
     $this_cmd = join(''
 	, "command{\\$cmd}"
 	, ($argn ? "[$argn]" :'') 
-	, (($opt =~ /^}$/) ? '' : "[$opt]" )
+	, (($opt =~ /^\}$/) ? '' : "[$opt]" )
 	, "{", $body , "}" );
     $this_cmd = &revert_to_raw_tex($this_cmd);
     if ($renewed) {
@@ -5876,7 +5876,7 @@
 #	    local($this_cmd) = join(''
 #		, "\n\\renewcommand{\\$cmd}"
 #		, ($argn ? "[$argn]" :'') 
-#		, (($opt =~ /^}$/) ? '' : "[$opt]" )
+#		, (($opt =~ /^\}$/) ? '' : "[$opt]" )
 #		, "{", $body , "}\n" );
 #	    $latex_body .= &revert_to_raw_tex($this_cmd);
 	    $latex_body .= "\n\\renew". $this_cmd."\n";
@@ -6876,7 +6876,7 @@
 	push(@preamble, $next_def ); 
     }
     else {
-	($name) = $next_def =~ /$marker\s*({[^}]+})/; # matches type{name}
+	($name) = $next_def =~ /$marker\s*(\{[^}]+\})/; # matches type{name}
 	$name = &escape_rx_chars($name);
 #	$preamble .= $next_def . "\n" unless ($preamble =~ /$marker\s*$name/);
 	push(@preamble, $name ); 
@@ -6931,7 +6931,7 @@
 	# \usepackage is invalid in LaTeX 2.09 and LaTeX-2e compatibility mode
 	$LATEX_COLOR = ''; $LOAD_LATEX_COLOR = '';
 	# ... so is \providecommand 
-	$preamble =~ s/\\documentstyle[^{]*{[^}]*}\n?/
+	$preamble =~ s/\\documentstyle[^{]*\{[^}]*\}\n?/
 		$&."\n\\let\\providecommand\\newcommand\n"/eo;
     }
 
@@ -7378,7 +7378,7 @@
 sub encode {
     local($_) = @_;
     # Remove invocation-specific stuff
-    1 while(s/\\(begin|end)\s*(($O|$OP)\d+($C|$CP))?|{?tex2html_(wrap|nowrap|deferred|)(_\w+)?}?(\2)?//go);
+    1 while(s/\\(begin|end)\s*(($O|$OP)\d+($C|$CP))?|\{?tex2html_(wrap|nowrap|deferred|)(_\w+)?\}?(\2)?//go);
     $_ = &revert_to_raw_tex($_);
     s/\\protect//g;		# remove redundant \protect macros
     #$_ = pack("u*", $_);	# uuencode
@@ -9648,15 +9648,15 @@
     s/(\\\w+)?$tex2html_wrap_rx([^\\\n])?/$tmp=$2;
         ((($tmp eq 'end')&&($1)&&!($5)&&($6))? "$1 $6":"$1$5$6")/egs;
     undef $tmp;
-    s/\s*\\newedcommand\s*{/"%\n\\providecommand{\\"/gem;
-    s/\\newedcommand\s*{/\\providecommand{\\/gom;
-#    s/(\n*)\\renewedcommand{/($1? "\n":'')."\\renewcommand{\\"/geo;
-    s/\s*\\providedcommand\s*{/"%\n\\providecommand{\\"/gem;
-#    s/\\providedcommand{/\\providecommand{\\/go;
+    s/\s*\\newedcommand\s*\{/"%\n\\providecommand{\\"/gem;
+    s/\\newedcommand\s*\{/\\providecommand{\\/gom;
+#    s/(\n*)\\renewedcommand\{/($1? "\n":'')."\\renewcommand{\\"/geo;
+    s/\s*\\providedcommand\s*\{/"%\n\\providecommand{\\"/gem;
+#    s/\\providedcommand\{/\\providecommand{\\/go;
     s/\\renewedenvironment\s*/\\renewenvironment/gom;
-    s/\\newedboolean\s*{/\\newboolean{/gom;
-    s/\\newedcounter\s*{/\\newcounter{/gom;
-    s/\\newedtheorem\s*{/\\newtheorem{/gom;
+    s/\\newedboolean\s*\{/\\newboolean{/gom;
+    s/\\newedcounter\s*\{/\\newcounter{/gom;
+    s/\\newedtheorem\s*\{/\\newtheorem{/gom;
     s/\\xystar/\\xy\*/gom; # the * has a special meaning in Xy-pic
 
     #fix-up the star'd environment names
@@ -14391,7 +14391,7 @@
 sub do_body_newcounter {
     local($ctr) = @_;
     $latex_body .= &revert_to_raw_tex("\\newcounter{$ctr}\n")
-	unless ($preamble =~ /\\new(counter|theorem){$ctr}/);
+	unless ($preamble =~ /\\new(counter|theorem)\{$ctr\}/);
     $global{$ctr} = 0;
     &process_commands_wrap_deferred("the$ctr ");
     $_;
@@ -16590,7 +16590,7 @@
     # used for labels in {enumerate} environments
     $standard_label_rx = 
 	"\\s*[[]\\s*((($any_next_pair_rx4)|([[][^]]*[]])|[^]])*)[]]";
-    $enum_label_rx = "^((({[^{}]*})|([^{}]))*)([aAiI1])(.*)";
+    $enum_label_rx = "^(((\{[^{}]*\})|([^{}]))*)([aAiI1])(.*)";
     $enum_level = 0;	# level for enumerate (1-4, i-iv)
 
 
@@ -16863,7 +16863,7 @@
 
 sub make_order_sensitive_rx {
     local(@theorem_alts, $theorem_alts);
-    @theorem_alts = ($preamble =~ /\\newtheorem\s*{([^\s}]+)}/og);
+    @theorem_alts = ($preamble =~ /\\newtheorem\s*\{([^\s}]+)\}/og);
     $theorem_alts = join('|', at theorem_alts);
 #
 #  HWS: Added kludge to require counters to be more than 2 characters long
diff -uN latex2html-2018/styles/alltt.perl.ORG latex2html-2018/styles/alltt.perl
--- latex2html-2018/styles/alltt.perl.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/alltt.perl	2018-07-24 17:11:23.446005000 +0900
@@ -51,11 +51,11 @@
     local ($alltt_begin) = "<alltt_begin>";
     local ($alltt_end) = "<alltt_end>";
     local($saveRS) = $/; undef $/;
-    while (/\\begin\s*{($alltt_rx)}([ \t]*\n)?/m) {
+    while (/\\begin\s*\{($alltt_rx)\}([ \t]*\n)?/m) {
 	$alltt_env = $1;
 	$alltt = "";
 	($before, $after) = ($`, $');
-	if ($after =~ /\\end\s*{($alltt_rx)}/sm) {
+	if ($after =~ /\\end\s*\{($alltt_rx)\}/sm) {
 	    ($alltt, $after) = ($`, $');
 	    local(@check) = split("\n",$before);
 	    local($lastline) = pop @check unless ($before =~ s/\n$//sm);
diff -uN latex2html-2018/styles/graphics-support.perl.ORG latex2html-2018/styles/graphics-support.perl
--- latex2html-2018/styles/graphics-support.perl.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/graphics-support.perl	2018-07-24 17:11:23.450316000 +0900
@@ -172,21 +172,21 @@
 
     #RRM: may only work correctly for Unix    
     # $dd  holds the directory-delimiter, usually / 
-    $paths =~ s/\s*({|})\s*/$1/g;
-    local(@paths) = split (/}/, $paths);
+    $paths =~ s/\s*(\{|\})\s*/$1/g;
+    local(@paths) = split (/\}/, $paths);
     if ($DESTDIR eq $FILE) {
 	# given paths are relative to parent directory
-	map(s|^{([^/~\.\$\\][^}]*)|{..$dd$1|, @paths);
-	map(s/^{\.\Q$dd\E/{\.\.$dd/, @paths);
+	map(s|^\{([^/~\.\$\\][^}]*)|{..$dd$1|, @paths);
+	map(s/^\{\.\Q$dd\E/{\.\.$dd/, @paths);
     } elsif ($DESTDIR eq '.') {
 	# paths are already relative to working directory
     } else { 
 	# specify full paths, by prepending source directory
-	map(s|^{([^/~\.\$\\][^}]*)|{$orig_cwd$dd$1|, @paths);
-	map(s/^{\.\Q$dd\E/{$orig_cwd$dd/, @paths);
+	map(s|^\{([^/~\.\$\\][^}]*)|{$orig_cwd$dd$1|, @paths);
+	map(s/^\{\.\Q$dd\E/{$orig_cwd$dd/, @paths);
     }
     $paths = join('}', @paths).'}';
-    map(s/^{//, at paths);		# Strip leading { and trailing $dd
+    map(s/^\{//, at paths);		# Strip leading { and trailing $dd
     map(s/\Q$dd\E$//, at paths);
     $GRAPHICS_PATH = [@$GRAPHICS_PATH, at paths];
 
diff -uN latex2html-2018/versions/html3_1.pl.ORG latex2html-2018/versions/html3_1.pl
--- latex2html-2018/versions/html3_1.pl.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/versions/html3_1.pl	2018-07-24 17:11:23.457697000 +0900
@@ -326,8 +326,8 @@
 	# Inside <MATH>, { and } have special meaning. Thus, need {
 	# and }
 #    s/{/{/g; s/}/}/g; # Where are these defined ?
-	s/{/{/g;
-	s/}/}/g;
+	s/\{/{/g;
+	s/\}/}/g;
 
 	# Remove the safety markers for math-entities
 	s/(\&\w+)#\w+;/$1;/g; 
@@ -1579,7 +1579,7 @@
 		local($extra) = &get_supsub;
 # contents of $extra may require an image !!
 		# revert the brace-pairs
-		if ($extra =~ /{|}/) { 
+		if ($extra =~ /\{|\}/) { 
 		    &mark_string($extra);
 		    $extra =~ s/$O(\d+)$C/$OP$1$CP/g;
 		}
diff -uN latex2html-2018/versions/math.pl.ORG latex2html-2018/versions/math.pl
--- latex2html-2018/versions/math.pl.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/versions/math.pl	2018-07-24 17:11:23.466156000 +0900
@@ -121,7 +121,7 @@
     local($saved) = $_;
 #   s/(^\s*(\$|\\\()\s*|\s*(\$|\\\))\s*$)//g; # remove the \$ signs or \(..\)
 #   s/^\\ensuremath(($O|$OP)\d+($C|$CP))(.*)\1/$4/; # remove an ensuremath wrapper
-    if (s/^$math_start_rx|$math_end_rx$//gs ) {}
+    if (s/^$math_start_rx|${math_end_rx}$//gs ) {}
     elsif (s/^\\ensuremath(($O|$OP)\d+($C|$CP))(.*)\1/$4/){} # remove an ensuremath wrapper
     else { $failed = 1 }
     s/\\(begin|end)(($O|$OP)\d+($C|$CP))tex2html_wrap\w*\2//g; # remove wrappers
@@ -370,9 +370,9 @@
     } else { 
 	# Inside <MATH>, { and } have special meaning. Thus, need {
 	# and }
-#    s/{/{/g; s/}/}/g; # Where are these defined ?
-	s/{/{/g;
-	s/}/}/g;
+#    s/\{/{/g; s/}/}/g; # Where are these defined ?
+	s/\{/{/g;
+	s/\}/}/g;
 
 	# Remove the safety markers for math-entities
 	s/(\&\w+)#\w+;/$1;/g; 
@@ -1657,7 +1657,7 @@
 		local($extra) = &get_supsub;
 # contents of $extra may require an image !!
 		# revert the brace-pairs
-		if ($extra =~ /{|}/) { 
+		if ($extra =~ /\{|\}/) { 
 		    &mark_string($extra);
 		    $extra =~ s/$O(\d+)$C/$OP$1$CP/g;
 		}
diff -uN latex2html-2018/styles/frames.perl.ORG latex2html-2018/styles/frames.perl
--- latex2html-2018/styles/frames.perl.ORG	2018-02-02 01:52:36.000000000 +0900
+++ latex2html-2018/styles/frames.perl	2018-07-24 17:11:28.606392000 +0900
@@ -706,7 +706,7 @@
     $_[0] =~ s/<(META NAME|LINK)[^>]*>\s*//g;
     $_[0] =~ s/$more_links_mark/$NO_ROBOTS\n$LATEX2HTML_META/g;
     local($savedRS)=$/; $/ = '';
-    $_[0] =~ s/\n\{2;}/\n/sg;
+    $_[0] =~ s/\n\{2,}/\n/sg;
     $_[0] =~ s/\s$//s;
     $_[0] =~ s!\s*(\n</HEAD>\n)\s*!$1!s;
     $/ = $savedRS;
----- To here -----

+========================================================+
 Shigeharu TAKENO     NIigata Institute of Technology
                       kashiwazaki,Niigata 945-1195 JAPAN
 shige at iee.niit.ac.jp   TEL(&FAX): +81-257-22-8161
+========================================================+


More information about the latex2html mailing list