texlive[45338] trunk: texcount (19sep17)

commits+karl at tug.org commits+karl at tug.org
Wed Sep 20 00:01:25 CEST 2017


Revision: 45338
          http://tug.org/svn/texlive?view=revision&revision=45338
Author:   karl
Date:     2017-09-20 00:01:25 +0200 (Wed, 20 Sep 2017)
Log Message:
-----------
texcount (19sep17)

Modified Paths:
--------------
    trunk/Build/source/texk/texlive/linked_scripts/texcount/texcount.pl
    trunk/Master/texmf-dist/scripts/texcount/texcount.pl

Added Paths:
-----------
    trunk/Master/texmf-dist/doc/support/texcount/README.md
    trunk/Master/texmf-dist/doc/support/texcount/doc/
    trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf
    trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf
    trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf
    trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/macros.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/sub_addrules.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/sub_options.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/sub_ruletypes.tex
    trunk/Master/texmf-dist/doc/support/texcount/doc/sub_tc_other.tex

Removed Paths:
-------------
    trunk/Master/texmf-dist/doc/support/texcount/QuickReference.pdf
    trunk/Master/texmf-dist/doc/support/texcount/QuickReference.tex
    trunk/Master/texmf-dist/doc/support/texcount/README
    trunk/Master/texmf-dist/doc/support/texcount/TeXcount.pdf
    trunk/Master/texmf-dist/doc/support/texcount/TeXcount.tex
    trunk/Master/texmf-dist/doc/support/texcount/TechDoc.pdf
    trunk/Master/texmf-dist/doc/support/texcount/TechDoc.tex
    trunk/Master/texmf-dist/doc/support/texcount/macros.tex
    trunk/Master/texmf-dist/doc/support/texcount/sub_addrules.tex
    trunk/Master/texmf-dist/doc/support/texcount/sub_options.tex
    trunk/Master/texmf-dist/doc/support/texcount/sub_tc_other.tex

Modified: trunk/Build/source/texk/texlive/linked_scripts/texcount/texcount.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/texcount/texcount.pl	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Build/source/texk/texlive/linked_scripts/texcount/texcount.pl	2017-09-19 22:01:25 UTC (rev 45338)
@@ -6,17 +6,40 @@
 use Text::Wrap;
 use Term::ANSIColor;
 
-BEGIN {
-  if ($^O=~/^MSWin/) {
+# System variables
+my $terminalwidth;
+
+# Conditional package inclusion
+if ($^O=~/^MSWin/) {
+  eval {
     require Win32::Console::ANSI;
-    Win32::Console::ANSI::->import();
+    import Win32::Console::ANSI;
+  };
+  if ($@) {
+    option_ansi_colours(0);
+    print STDERR "NOTE: Package Win32::Console::ANSI required for colour coded output.\n";
   }
 }
 
+# Terminal or not
+if (-t STDOUT) { # If in terminal
+  eval {
+    require Term::ReadKey;
+    import Term::ReadKey;
+    ($terminalwidth)=GetTerminalSize();
+  };
+} else {
+  option_ansi_colours(0);
+}
+
+if (!defined $terminalwidth) {$terminalwidth=76;}
+elsif ($terminalwidth<60) {$terminalwidth=60;}
+elsif ($terminalwidth>120) {$terminalwidth=120;}
+
 ##### Version information
 
-my $versionnumber="3.0";
-my $versiondate="2013 Jul 29";
+my $versionnumber="3.1";
+my $versiondate="2017 Sep 16";
 
 ###### Set global settings and variables
 
@@ -25,7 +48,7 @@
    ('versionnumber'  => $versionnumber
    ,'versiondate'    => $versiondate
    ,'maintainer'     => 'Einar Andreas Rodland'
-   ,'copyrightyears' => '2008-2013'
+   ,'copyrightyears' => '2008-2017'
    ,'website'        => 'http://app.uio.no/ifi/texcount/'
    );
 
@@ -119,7 +142,7 @@
 my $_STDIN_='<STDIN>'; # File name to represent STDIN (must be '<...>'!)
 
 # CMD specific settings
-$Text::Wrap::columns=76; # Page width for wrapped output
+$Text::Wrap::columns=$terminalwidth; # Page width for wrapped output
 
 ###### Set state identifiers and methods
 
@@ -206,6 +229,7 @@
 my $STATE_EXCLUDE_STRONG=-20;
 my $STATE_EXCLUDE_STRONGER=-30;
 my $STATE_EXCLUDE_ALL=-40;
+my $STATE_SPECIAL_ARGUMENT=-90;
 my $STATE_PREAMBLE=-99;
 my $STATE_TEXT=1;
 my $STATE_TEXT_HEADER=2;
@@ -236,6 +260,7 @@
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_STRONG,-2,'xx');
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_STRONGER,-3,'xxx');
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_ALL,-4,'xall');
+add_keys_to_hash(\%key2state,$STATE_SPECIAL_ARGUMENT,'specarg','spescialarg','specialargument');
 add_keys_to_hash(\%key2state,$_STATE_OPTION,'[',' option',' opt',' optional');
 add_keys_to_hash(\%key2state,$_STATE_NOOPTION,'nooption','nooptions','noopt','noopts');
 add_keys_to_hash(\%key2state,$_STATE_AUTOOPTION,'autooption','autooptions','autoopt','autoopts');
@@ -245,6 +270,7 @@
     $STATE_EXCLUDE_ALL,
     $STATE_EXCLUDE_STRONGER,
     $STATE_EXCLUDE_STRONG,
+    $STATE_SPECIAL_ARGUMENT,
     $STATE_FLOAT,
     $STATE_MATH,
     $STATE_IGNORE,
@@ -281,6 +307,7 @@
     $STATE_EXCLUDE_STRONGER => 'stronger exclude: ignore environments and macro paramters',
     $STATE_EXCLUDE_ALL      => 'exlude all: even {, only scan for end marker',
     $STATE_PREAMBLE         => 'preamble: from \documentclass to \begin{document}',
+    $STATE_SPECIAL_ARGUMENT => 'special macro argument that TeXcount may process further',
     $STATE_TEXT             => 'text: count words',
     $STATE_TEXT_HEADER      => 'header text: count words as header words',
     $STATE_TEXT_FLOAT       => 'float text: count words as float words (e.g. captions)',
@@ -287,8 +314,18 @@
     $STATE_TO_HEADER        => 'header: count header, then count words as header words',
     $STATE_TO_FLOAT         => 'float: count float, then count words as float/other words',
     $STATE_TO_INLINEMATH    => 'inline math: count as inline math/equation',
-    $STATE_TO_DISPLAYMATH   => 'displayed math: count as displayed math/equation');
+    $STATE_TO_DISPLAYMATH   => 'displayed math: count as displayed math/equation',
+    $_STATE_OPTION          => 'rule for [] option follows',
+    $_STATE_NOOPTION        => 'no [] options allowed here',
+    $_STATE_AUTOOPTION      => 'automatic [] option gobbling');
 
+# Short state name for each state for use with -showstates
+my %state2key = ($STATE_PREAMBLE=>'pre');
+for my $key ('x','xx','xxx','xall','w','hw','ow','eq','ds',
+        'head','float','isfloat','ismath','specarg') {
+  $state2key{$key2state{$key}}=$key;
+}
+
 # Parsing state presentation style
 my %state2style=(
     $STATE_TEXT        => 'word',
@@ -330,6 +367,8 @@
 # TODO: Should do a conversion based on STATE values.
 sub state_to_text {
   my $st=shift @_;
+  my $statename = $state2key{$st};
+  if (defined $statename) {$st=$statename;}
   return $st;
 }
 
@@ -350,6 +389,7 @@
   push @countdesc,$desc;
   if (defined $sumweights[$like]) {$sumweights[$cnt]=$sumweights[$like];}
   $key2state{$key}=$state;
+  $state2key{$state}=$key;
   $state2cnt{$state}=$cnt;
   $state2style{$state}='altwd';
   push @STATE_MID_PRIORITY,$state;
@@ -378,18 +418,18 @@
 my $STYLE_EMPTY=' ';
 my $STYLE_BLOCK='-';
 my $NOSTYLE=' ';
-$STYLES{'Errors'}={'error'=>'bold red'};
+$STYLES{'Errors'}={'error'=>'bold red','note'=>'bold white'};
 $STYLES{'Words'}={'word'=>'blue','hword'=>'bold blue','oword'=>'blue','altwd'=>'blue'};
-$STYLES{'Macros'}={'cmd'=>'green','fileinc'=>'bold green'};
+$STYLES{'Macros'}={'cmd'=>'green','fileinc'=>'bold green','special'=>'bold red','specarg'=>'red'};
 $STYLES{'Options'}={'option'=>'yellow','optparm'=>'green'};
 $STYLES{'Ignored'}={'ignore'=>'cyan','math'=>'magenta'};
 $STYLES{'Excluded'}={'exclcmd'=>'yellow','exclenv'=>'yellow','exclmath'=>'yellow','mathcmd'=>'yellow'};
-$STYLES{'Groups'}={'document'=>'red','envir'=>'red','mathgroup'=>'magenta'};
+$STYLES{'Groups'}={'document'=>'bold red','envir'=>'red','mathgroup'=>'magenta'};
 $STYLES{'Comments'}={'tc'=>'bold yellow','comment'=>'yellow'};
 $STYLES{'Sums'}={'cumsum'=>'yellow'};
 $STYLES{'States'}={'state'=>'cyan underline'};
-$STYLES{'<core>'}={%{$STYLES{'Errors'}},$STYLE_EMPTY=>$NOSTYLE,'<printlevel>'=>1};
-$STYLES{0}={%{$STYLES{'Errors'}},'<printlevel>'=>0};
+$STYLES{'<core>'}={%{$STYLES{'Errors'}},$STYLE_EMPTY=>$NOSTYLE,'<printlevel>'=>1,'note'=>'bold white'};
+$STYLES{0}={%{$STYLES{'Errors'}},'<printlevel>'=>0,'note'=>'bold white'};
 $STYLES{1}={%{$STYLES{'<core>'}},%{$STYLES{'Words'}},%{$STYLES{'Groups'}},%{$STYLES{'Sums'}}};
 $STYLES{2}={%{$STYLES{1}},%{$STYLES{'Macros'}},%{$STYLES{'Ignored'}},%{$STYLES{'Excluded'}}};
 $STYLES{3}={%{$STYLES{2}},%{$STYLES{'Options'}},%{$STYLES{'Comments'}},'<printlevel>'=>2};
@@ -398,7 +438,8 @@
 my %STYLE=%{$STYLES{$defaultVerbosity}};
 
 my @STYLE_LIST=('error','word','hword','oword','altwd',
-  'ignore','document','cmd','exclcmd','option','optparm','envir','exclenv',
+  'ignore','document','special','cmd','exclcmd',
+  'option','optparm','envir','exclenv','specarg',
   'mathgroup','exclmath','math','mathcmd','comment','tc','fileinc','state','cumsum');
 my %STYLE_DESC=(
   'error'       => 'ERROR: TeXcount error message',
@@ -408,10 +449,12 @@
   'altwd'       => 'Words in user specified counters: counted in separate counters',
   'ignore'      => 'Ignored text or code: excluded or ignored',
   'document'    => '\documentclass: document start, beginning of preamble',
+  'special'     => 'Special macros, eg require special handling or have side-effects',
   'cmd'         => '\macro: macro not counted, but parameters may be',
   'exclcmd'     => '\macro: macro in excluded region',
   'option'      => '[Macro options]: not counted',
   'optparm'     => '[Optional parameter]: content parsed and styled as counted',
+  'specarg'     => 'Special argument, eg with side-effects',
   'envir'       => '\begin{name}  \end{name}: environment',
   'exclenv'     => '\begin{name}  \end{name}: environment in excluded region',
   'mathgroup'   => '$  $: counted as one equation',
@@ -434,7 +477,7 @@
    mu nu xi pi rho sigma tau upsilon phi chi psi omega
    Gamma Delta Theta Lambda Xi Pi Sigma Upsilon Phi Psi Omega 
    /;
-my $specialchars='\\\\('.join('|', at LetterMacros).')(\{\}|\s*|\b)';
+my $specialchars='\\\\('.join('|', at LetterMacros).')(\{\}|\s+|\b)';
 my $modifiedchars='\\\\[\'\"\`\~\^\=](@|\{@\})';
 my %NamedLetterPattern;
 $NamedLetterPattern{'restricted'}='@';
@@ -459,7 +502,7 @@
 # a macro.
 my %NamedMacroOptionPattern;
 $NamedMacroOptionPattern{'default'}='\[[^\[\]\n]*\]';
-$NamedMacroOptionPattern{'relaxed'}='\[[^\[\]\n]*(\n[^\[\]\n]+)\n?\]';
+$NamedMacroOptionPattern{'relaxed'}='\[\n?([^\[\]\n]\n?)*\]';
 $NamedMacroOptionPattern{'restricted'}='\[(\w|[,\-\s\~\.\:\;\+\?\*\_\=])*\]';
 my $MacroOptionPattern=$NamedMacroOptionPattern{'default'};
 
@@ -538,9 +581,12 @@
 ###### Define core rules
 
 ### Macros indicating package inclusion
-# Will always be assumed to take one parameter (plus options).
+# Will always be assumed to take one extra parameter which is the list of
+# packages. Macro handling rule indicates parameters ignored prior to that.
 # Gets added to TeXmacro. After that, values are not used, only membership.
-my %TeXpackageinc=('\usepackage'=>1,'\RequirePackage'=>1);
+# Handling is otherwise hard-coded rather than rule based.
+my %TeXpackageinc;
+add_keys_to_hash(\%TeXpackageinc,['[','ignore','specialargument'],'\usepackage','\RequirePackage');
 
 ### Macros that are counted within the preamble
 # The preamble is the text between \documentclass and \begin{document}.
@@ -609,7 +655,7 @@
     '\setlength','\addtolength','\settodepth','\settoheight','\settowidth','\setcounter',
     '\addtocontents','\addtocounter',
     '\fontsize');
-add_keys_to_hash(\%TeXmacro,3,'\multicolumn','\addcontentsline');
+add_keys_to_hash(\%TeXmacro,3,'\addcontentsline');
 add_keys_to_hash(\%TeXmacro,6,'\DeclareFontShape');
 add_keys_to_hash(\%TeXmacro,['[','text','ignore'],
     '\cite','\nocite','\citep','\citet','\citeauthor','\citeyear','\citeyearpar',
@@ -678,6 +724,7 @@
 
 ### Convert state keys to codes
 convert_hash(\%TeXpreamble,\&keyarray_to_state);
+convert_hash(\%TeXpackageinc,\&keyarray_to_state);
 convert_hash(\%TeXfloatinc,\&keyarray_to_state);
 convert_hash(\%TeXmacro,\&keyarray_to_state);
 convert_hash(\%TeXmacrocount,\&keyarray_to_cnt);
@@ -822,6 +869,7 @@
 ###### Main script
 
 
+
 ###################################################
 
 MAIN(@ARGV);
@@ -837,8 +885,8 @@
 # MAIN ROUTINE: Handle arguments, then parse files
 sub MAIN {
   my @args;
- push @args, at StartupOptions;
- push @args, at _;
+  push @args, at StartupOptions;
+  push @args, at _;
   Initialise();
   Check_Arguments(@args);
   my @toplevelfiles=Parse_Arguments(@args);
@@ -875,36 +923,46 @@
 # Check arguments, exit on exit condition
 sub Check_Arguments {
   my @args=@_;
-  my $arg=$args[0];
   if (!@args) {
     print_version();
-    print_short_help();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))$/) {
     print_help();
     exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))=(.*)$/) {
-    print_help_on_rule($4);
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(opt|options?)$/) {
-    print_syntax();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(opt|options?)=(.*)$/) {
-    print_syntax_subset($5);
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(styles?)$/) {
-    print_help_on_styles();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(styles?)=(\w+)$/) {
-    print_help_on_styles($5);
-    exit;
-  } elsif ($arg=~/^--?(ver|version)$/) {
-    print_version();
-    exit;
-  } elsif ($arg=~/^--?(lic|license|licence)$/) {
-    print_license();
-    exit;
   }
+  for my $arg (@args) {
+    $arg=~s/^(--?(h|\?|help)|\/(\?|h))\b/-h/;
+    $arg=~s/[=:]/=/;
+    if ($arg=~/^-h$/) {
+      print_help();
+      exit;
+    } elsif ($arg=~/^-(h-)?(man|manual)$/) {
+      print_help_man();
+      exit;
+    } elsif ($arg=~/^-h-?(opt|options?)$/) {
+      print_help_options();
+      exit;
+    } elsif ($arg=~/^-h-?(opt|options?)=(.*)$/) {
+      print_help_options_subset($2);
+      exit;
+    } elsif ($arg=~/^-h(-rule)?=(.*)$/) {
+      print_help_on_rule($2);
+      exit;
+    } elsif ($arg=~/^-h-styles?$/) {
+      print_help_on_styles();
+      exit;
+    } elsif ($arg=~/^-h-styles?=(\w+)$/) {
+      print_help_on_styles($1);
+      exit;
+    } elsif ($arg=~/^-h-(tc|(tc)?inst(ructions?))?$/) {
+      print_help_tcinst();
+      exit;
+    } elsif ($arg=~/^--?(ver|version)$/) {
+      print_version();
+      exit;
+    } elsif ($arg=~/^--?(lic|license|licence)$/) {
+      print_license();
+      exit;
+    }
+  }
   return 1;
 }
 
@@ -913,12 +971,15 @@
   my @args=@_;
   my @files;
   foreach my $arg (@args) {
-    if (parse_option($arg)) {next;}
     if ($arg=~/^\-/) {
-      print "Invalid opton $arg \n\n";
-      print_short_help();
+      $arg=~s/[=:]/=/;
+      if (parse_option($arg)) {next;}
+      print "Invalid option $arg \n\n";
+      print_help();
       exit;
-    }
+    } elsif ($arg=~/^@\-/) { # ignored option
+      next;
+    } 
     $arg=~s/\\/\//g;
     push @files,$arg;
   }
@@ -1003,8 +1064,9 @@
   my $arg=shift @_;
   if (!defined $arg) {
     @sumweights=(0,1,1,1,0,0,1,1);
-  } elsif ($arg=~/^(\d+(\.\d*)?(,\d+(\.\d*)?){0,6})$/) {
-    @sumweights=(0,split(',',$1));
+  } elsif ($arg=~/^(\d+(\.\d*)?([,+]\d+(\.\d*)?){0,6})$/) {
+    @sumweights=(0,split(/[,+]/,$arg));
+    print STDERR "SUMWEIGHTS: ",join(', ', at sumweights),"\n";
   } else {
     print STDERR "Warning: Option value $arg not valid, ignoring option.\n";
   }
@@ -1039,7 +1101,11 @@
   elsif ($arg eq '-nover') {$showVersion=-1;}
   elsif ($arg =~/^-nosep(arator)?s?$/ ) {$separator='';}
   elsif ($arg =~/^-sep(arator)?s?=(.*)$/ ) {$separator=$2;}
-  elsif ($arg =~/^-out=(.*)/ ) {close STDOUT; open STDOUT,'>',$1;}
+  elsif ($arg =~/^-out=(.*)/ ) {
+    close STDOUT;
+    open STDOUT,'>',$1 or die "Could not open out file for writing: $1";
+  }
+  elsif ($arg =~/^-out-stderr/ ) {select STDERR;}
   else {return 0;}
   return 1;
 }
@@ -1099,10 +1165,11 @@
 # Parse option file TC options
 sub __optionfile_tc {
   my $arg=shift @_;
+  if ($arg=~/^\%\%/) {return 1;}
   $arg=~s/^\%\s*// || return 0;
   if ($arg=~/^subst\s+(\\\w+)\s+(.*)$/i) {
     $substitutions{$1}=$2;
-  } elsif ($arg=~/^(\w+)\s+([\\]*\w+)\s+([^\s\n]+)(\s+([0-9]+))?/i) {
+  } elsif ($arg=~/^(\w+)\s+([\\]*\w+)\s+([^\s\n]+)(\s+(\-?[0-9]+|\w+))?/i) {
     tc_macro_param_option($Main,$1,$2,$3,$5) || die "Invalid TC option: $arg\n";
   } else {
     print "Invalid TC option format: $arg\n";
@@ -1115,7 +1182,9 @@
 sub Parse_file_list {
   my @files=@_;
   my $listtotalcount=new_count('Total');
-  foreach (@files) {s/\\/\//g; s/ /\\ /g;}
+  foreach (@files) {
+    $_='"'.$_.'"'
+  }
   if (@files) {
     @files=<@files>; # For the sake of Windows: expand wildcards!
     for my $file (@files) {
@@ -1224,7 +1293,7 @@
   my $tex=shift @_;
   my $file=shift @_;
   foreach my $path (@_) {
-    if (!$path=~/[\\\/]$/) {$path.='/';}
+    if ($path && $path!~/[\\\/]$/) {$path.='/';}
     my $filepath=$path.$file;
     if (-e $filepath) {return $filepath;}
     elsif ($filepath=~/\.tex$/i) {}
@@ -1372,6 +1441,13 @@
     $countdesc[2]='Letters in headers';
     $countdesc[3]='Letters in captions';
     return 'letter';
+  } elsif ($language=~/^all-nonspace-(char|character|letter)s?$/) {
+    @WordPatterns=($NamedWordPattern{'letters'});
+    @AlphabetScripts=qw/Digit Is_alphabetic Is_punctuation/;
+    $countdesc[1]='Characters in text';
+    $countdesc[2]='Characters in headers';
+    $countdesc[3]='Characters in captions';
+    return 'nonspace-characters';
   } else {
     return undef;
   }
@@ -1919,6 +1995,36 @@
 ###### Error handling
 
 
+# Print note to output
+sub note {
+  my ($tex,$level,$text,$prefix,$style)=@_;
+  if ($printlevel>=$level) {
+    $prefix=(defined $prefix)?$prefix:'%NOTE: ';
+    $style=(defined $style)?$style:'note';
+    $text=count_in_template($tex->{'subcount'},$text);
+    flush_next($tex);
+    line_return(0,$tex);
+    print_style($prefix.$text,$style);
+    flush_next($tex);
+    $blankline=-1;    
+  }
+}
+
+# Compare count with expected and note if assertion fails
+sub assertion_note {
+  my ($tex,$checktext,$template)=@_;
+  my $count=$tex->{'subcount'};
+  my @check=split(/,/,$checktext);
+  for (my $i=scalar @check;$i>0;$i--) {
+    if ($check[$i-1] ne get_count($count,$i)) {
+      my $msg=$template.' [expected:'.join(',', at check).']';
+      note($tex,0,$msg,'%ASSERTION FAILED: ','error');
+      return 1;
+    }
+  }
+  return 0;
+}
+
 # Add warning to list of registered warnings (optionally to be reported at the end)
 sub warning {
   my ($tex,$text)=@_;
@@ -2012,11 +2118,12 @@
     $simple_token=1;
   }
   my $next;
+  my @specarg;
   while (defined ($next=next_token($tex,$simple_token))) {
     # Parse next token until token matches $end
     set_style($tex,'ignore');
     if ($state==$STATE_MATH) {set_style($tex,'math');}
-    if ((defined $end) && ($end eq $next)) {return;}
+    if ((defined $end) && ($end eq $next)) {return @specarg;}
     # Determine how token should be interpreted
     if ($state==$STATE_PREAMBLE && $next eq '\begin' && $tex->{'line'}=~/^\{\s*document\s*\}/) {
       # \begin{document}
@@ -2028,9 +2135,19 @@
     } elsif ($tex->{'type'}==$TOKEN_SPACE) {
       # space or other code that should be passed through without styling
       flush_next($tex,' ');
+    } elsif ($next eq '{') {
+      # {...} group
+      set_style($tex,'ignore');
+      push @specarg,_parse_unit($tex,$state,'}');
+      set_style($tex,'ignore');
+    } elsif ($next eq '}') {
+      error($tex,'Encountered } without corresponding {.');
     } elsif ($tex->{'type'}==$TOKEN_TC) {
       # parse TC instructions
       _parse_tc($tex,$next);
+    } elsif ($state==$STATE_SPECIAL_ARGUMENT) {
+      set_style($tex,'specarg');
+      push @specarg,$next;
     } elsif ($tex->{'type'}==$TOKEN_WORD) {
       # word
       if (my $cnt=state_text_cnt($state)) {
@@ -2038,13 +2155,6 @@
         inc_count($tex,$cnt);
         set_style($tex,state_to_style($state));
       }
-    } elsif ($next eq '{') {
-      # {...} group
-      set_style($tex,'ignore');
-      _parse_unit($tex,$state,'}');
-      set_style($tex,'ignore');
-    } elsif ($next eq '}') {
-      error($tex,'Encountered } without corresponding {.');
     } elsif ($state==$STATE_EXCLUDE_STRONGER) {
       # ignore remaining tokens
       set_style($tex,'ignore');
@@ -2053,7 +2163,7 @@
       set_style($tex,'document');
       _parse_documentclass_params($tex);
       while (!($tex->{'eof'})) {
-        _parse_unit($tex,$STATE_PREAMBLE);
+        push @specarg,_parse_unit($tex,$STATE_PREAMBLE);
       }
     } elsif ($tex->{'type'}==$TOKEN_MACRO) {
       # macro call
@@ -2072,15 +2182,16 @@
       # handle as parameter that should not be counted
       set_style($tex,'ignore');
     }
-    if (!defined $end) {return;}
+    if (!defined $end) {return @specarg;}
   }
   defined $end && error($tex,'Reached end of file while waiting for '.$end.'.');
+  return @specarg;
 }
 
 # Print state
 sub _set_printstate {
   my ($tex,$state,$end)=@_;
-  $tex->{'printstate'}=':'.state_to_text($state).':'.(defined $end?$end.':':'');
+  $tex->{'printstate'}=':'.state_to_text($state).(defined $end?'>'.$end:'').':';
   flush_next($tex);
 }
 
@@ -2102,6 +2213,7 @@
     next_subcount($tex,$label);
   }
   if ($state==$STATE_MATH) {set_style($tex,'mathcmd');}
+  elsif ($state==$STATE_SPECIAL_ARGUMENT) {set_style($tex,'specarg');}
   else {set_style($tex,state_is_text($state)?'cmd':'exclcmd');}
   if ($next eq '\begin' && state_inc_envir($state)) {
     _parse_envir($tex,$state);
@@ -2111,8 +2223,10 @@
     push @macro,$STRING_ERROR;
   } elsif ($next eq '\verb') {
     _parse_verb_region($tex,$state);
-  } elsif (state_is_parsed($state) && defined $TeXpackageinc{$next} ) {
-    _parse_include_package($tex);
+  } elsif (state_is_parsed($state) && defined (my $substat=$TeXpackageinc{$next})) {
+    # Parse macro parameters, use _parse_include_argument to process package list
+    set_style($tex,'document');
+  	push @macro,__gobble_macro_parms($tex,$substat,$__STATE_NULL,\&_parse_include_argument);
     push @macro,'<package>';
   } elsif (state_is_parsed($state) && defined (my $def=$TeXfileinclude{$next})) {
     # include file (merge in or queue up for parsing)
@@ -2183,7 +2297,7 @@
   } elsif ($instr eq 'insert') {
     $tex->{'line'}="\n".$next.$tex->{'line'};
   } elsif ($instr eq 'subst') {
-    if ($next=~/^(\\\S+)\s+(.*)$/) {
+    if ($next=~/^(\S+)\s*(\S.*)?$/) {
       my $from=$1;
       my $to=$2;
       $substitutions{$from}=$to;
@@ -2192,12 +2306,20 @@
       error($tex,'Invalid %TC:subst format.');
     }
   } elsif ($instr eq 'newcounter') {
-    assert($next=~s/^(\w+)(=(\w+))?\s*//,$tex,'Should have format %TC:newcounter {key}[={like-key}] {description}')
+    assert($next=~s/^(\w+)(=(\w+))?\s*//,$tex,'Expected format: %TC:newcounter {key}[={like-key}] {description}')
     || return;
     my $key=$1;
     my $like=$3;
     if ($next eq '') {$next=$key;}
     add_new_counter($key,$next,$like);
+  } elsif ($instr eq 'log') {
+    assert($next=~s/^(.*)$//,$tex,'Expected format: %TC:log {text or template}') || return;
+    note($tex,1,$1);
+  } elsif ($instr eq 'assert') {
+    assert($next=~s/^(\d+(,\d+)*)(\s+(.*))?$//,$tex,'Expected format: %TC:assert count+count+... {text or template}')
+    || return;
+    my $template=$4 || 'Words counted: {w} in text, {hw} in headers, {ow} other.';
+    assertion_note($tex,$1,$template);
   } elsif ($next=~/^([\\]*\S+)\s+([^\s]+)(\s+(-?\w+))?/) {
     # %TC:instr macro param option
     my $macro=$1;
@@ -2337,7 +2459,7 @@
       if ($param eq 'file') {$file=$2;}
       elsif ($param eq 'texfile') {
         $file=$2;
-        if (!$file=~/\.tex$/i) {$file.='.tex';}
+        if ($file!~/\.tex$/i) {$file.='.tex';}
       }
       else {$params{$param}=$2;}
     }
@@ -2364,7 +2486,6 @@
 sub _parse_include_package {
   my ($tex)=@_;
   set_style($tex,'document');
-  __gobble_option($tex);
   if ( $tex->{'line'}=~s/^\{(([\w\-]+)(\s*,\s*[\w\-]+)*)\}// ) {
     print_style("{$1}",'document');
     foreach (split(/\s*,\s*/,$1)) {
@@ -2377,6 +2498,17 @@
   }
 }
 
+# Extract package names from token list and include packages
+sub _parse_include_argument {
+  my $tex=shift @_;
+  my $args=join('', at _);
+  set_style($tex,'document');
+  foreach (split(/\s*,\s*/,$args)) {
+    $MacroUsage{"<package:$_>"}++;
+    include_package($_,$tex);
+  }
+}
+
 # Parse \documentclass parameters and include rules
 sub _parse_documentclass_params {
   my ($tex)=@_;
@@ -2445,7 +2577,7 @@
 
 # Gobble macro parameters as specified in parm plus options
 sub __gobble_macro_parms {
-  my ($tex,$parm,$oldstat)=@_;
+  my ($tex,$parm,$oldstat,$specarghandler)=@_;
   my $n;
   my @ret;
   if (ref($parm) eq 'ARRAY') {
@@ -2477,7 +2609,10 @@
       # Parse macro parameter
       if ($auto_gobble_options) {push @ret,__gobble_options($tex);}
       push @ret,$STRING_PARAMETER;
-      _parse_unit($tex,__new_state($p,$oldstat),$_PARAM_);
+      my @specarg=_parse_unit($tex,__new_state($p,$oldstat),$_PARAM_);
+      if ($p==$STATE_SPECIAL_ARGUMENT && defined $specarghandler) {
+        &$specarghandler($tex, at specarg);
+      }
     }
   }
   #TODO: Drop default gobbling of option at end?
@@ -2647,7 +2782,9 @@
   my $count=shift @_;
   my $sum=0;
   for (my $i=scalar(@sumweights);$i-->1;) {
-    $sum+=get_count($count,$i)*$sumweights[$i];
+    if ($sumweights[$i]) {
+      $sum+=get_count($count,$i)*$sumweights[$i];
+    }
   }
   return $sum;
 }
@@ -2928,6 +3065,7 @@
 # Print count summary for a count object
 sub print_count {
   my ($count,$class)=@_;
+  line_return(0);
   if ($htmlstyle) {print "<div class='".($class||'count')."'>\n";}  
   if ($outputtemplate) {
     _print_count_template($count,$outputtemplate);
@@ -3033,8 +3171,8 @@
   __print_count_using_template($count,$template);
 }
 
-# Print counts using template
-sub __print_count_using_template {
+# Return string with counts based on template
+sub count_in_template {
   my ($count,$template)=@_;
   while (my ($key,$cnt)=each %key2cnt) {
     $template=__process_template($template,$key,get_count($count,$cnt));
@@ -3046,9 +3184,15 @@
   $template=__process_template($template,'SUM',get_sum_count($count));
   $template=__process_template($template,'TITLE',$count->{'title'}||'');
   $template=__process_template($template,'SUB',number_of_subcounts($count));
-  print $template;
+  $template=~s/\a//gis;
+  return $template;
 }
 
+# Print counts using template
+sub __print_count_using_template {
+  print count_in_template(@_);
+}
+
 # Print subcounts using template
 sub __print_subcounts_using_template {
   my ($count,$template)=@_;
@@ -3070,7 +3214,7 @@
     $template=~s/\{($label)\?(.*?)\?(\1)\}//gis;
   }
   if (!defined $value) {$value='';}
-  $template=~s/\{($label)\}/$value/gis;
+  $template=~s/\{($label)\}/$value\a/gis;
   return $template;
 }
 
@@ -3114,6 +3258,7 @@
     } elsif ($tex->{'line'}=~s/^([ \t\f]+)//) {
       if ($prt) {print $1;}
     }
+    if ($tex->{'line'}=~/^\%TC:/i) {return;}
     if ($tex->{'line'}=~s/^(\%+[^\r\n]*)//) {
       print_style($1,'comment');
       $ret=1;
@@ -3204,11 +3349,6 @@
   wprintstringdata('Version');
 }
 
-# Print TeXcount reference text
-sub print_reference {
-  wprintstringdata('Reference');
-}
-
 # Print TeXcount licence text
 sub print_license {
   wprintstringdata('License');
@@ -3215,18 +3355,30 @@
 }
 
 # Print short TeXcount help
-sub print_short_help {
+sub print_help {
   wprintstringdata('ShortHelp');
 }
 
+# Print main TeXcount help
+sub print_help_man {
+  wprintstringdata('HelpTitle');
+  wprintstringdata('HelpText');
+  wprintstringdata('Reference');
+}
+
+# Print help on TC instructions
+sub print_help_tcinst {
+  wprintstringdata('TCinstructions');
+}
+
 # Print TeXcount options list
-sub print_syntax {
+sub print_help_options {
   wprintstringdata('OptionsHead');
-  wprintstringdata('Options','@ -          :');
+  wprintstringdata('Options',StringDatum('OptionsFormat'));
 }
 
-# Prinst TeXcount options containing substring
-sub print_syntax_subset {
+# Print TeXcount options containing substring
+sub print_help_options_subset {
   my $pattern=shift @_;
   my $data=StringData('Options');
   if (!defined $data) {
@@ -3240,29 +3392,10 @@
   if (scalar(@options)==0) {print "No options contained $pattern.\n";}
   else {
     print "Options containing \"$pattern\":\n\n";
-    wprintlines('@ -          :', at options);
+    wprintlines(StringDatum('OptionsFormat'), at options);
   }
 }
 
-# Print complete TeXcount help
-sub print_help {
-  print_help_title();
-  print_syntax();
-  print_help_text();
-  print_reference();
-}
-
-# Print help title 
-sub print_help_title {
-  wprintstringdata('HelpTitle');
-}
-
-# Print help text
-sub print_help_text {
-  wprintstringdata('HelpText');
-  wprintstringdata('TCinstructions');
-}
-
 # Print help on specific macro or environment
 sub print_help_on_rule {
   my $arg=shift @_;
@@ -3326,16 +3459,20 @@
 # Print macro handling rule
 sub _print_rule_macro {
   my ($arg,$def)=@_;
-  if (ref($def) eq 'ARRAY') {
+  if (!defined $def) {
+    print "Takes no parameter(s).\n";
+  } elsif (ref($def) eq 'ARRAY') {
     my $optionflag=0;
-    print "Takes the following parameter(s):\n";
+    print "Takes has the following parameters and parameter rules:\n";
     foreach my $state (@{$def}) {
       if ($state==$_STATE_OPTION) {$optionflag=1;}
+      elsif ($state==$_STATE_NOOPTION) {print " - no [] options permitted here\n";}
+      elsif ($state==$_STATE_AUTOOPTION) {}
       elsif ($optionflag) {
         $optionflag=0;
-        print " - Optional [] containing $state2desc{$state}\n";
+        print " + optional [] containing $state2desc{$state}\n";
       } else {
-        print " - $state2desc{$state}\n";
+        print " + $state2desc{$state}\n";
       }
     }
   } else {
@@ -3348,7 +3485,9 @@
   my ($arg,$def)=@_;
   print "Contents parsed as $state2desc{$def}\n";
   if ($def=$TeXmacro{$PREFIX_ENVIR.$arg}) {
-    _print_rule_macro($def);
+    _print_rule_macro($arg,$def);
+  } else {
+    print "Takes no parameter(s).\n";
   }
 }
 
@@ -3484,6 +3623,8 @@
 .mathcmd {color: #6c0;}
 .ignore {color: #999;}
 .exclenv {color:#c66;}
+.special {color:#c66; font-weight: bold;}
+.specarg {color:#c66; font-weight: bold; font-style: italic;}
 .tc {color: #999; font-weight:bold;}
 .comment {color: #999; font-style: italic;}
 .state {color: #990; font-size: 70%;}
@@ -3490,6 +3631,7 @@
 .cumsum {color: #999; font-size: 80%;}
 .fileinc {color: #696; font-weight:bold;}
 .warning {color: #c00; font-weight: 700;}
+.note {color: #c90; font-weight: bold;}
 
 div.filegroup, div.parse, div.stylehelp, div.count, div.sumcount, div.error {
    border: solid 1px #999; margin: 4pt 0pt; padding: 4pt;
@@ -3546,6 +3688,11 @@
   return STRINGDATA()->{$name};
 }
 
+# First line of StringData
+sub StringDatum {
+  return pop @{StringData(@_)};
+}
+
 # Insert value from GLOBALDATA
 sub __apply_globaldata {
   my $name=shift @_;
@@ -3607,9 +3754,9 @@
   my $ind2=6;
   my $i;
   foreach my $line (@lines) {
-    if ($line=~s/^@//) {
-      $ind2=1+index($line,':');
-      $ind1=1+index($line,'-');
+    if ($line=~s/^@/ /) {
+      $ind1=index($line,'-');
+      $ind2=index($line,':');
       if ($ind1<1) {$ind1=$ind2;}
       next;
     }
@@ -3616,7 +3763,7 @@
     my $firstindent=0;
     if ($line=~s/^(\t|\s{2,})(\S)/$2/) {$firstindent=$ind1;}
     my $indent=$firstindent;
-    if ($line=~/^(.*\S)(\t|\s{2,})(.*)$/) {
+    if ($line=~/^(.*?\S)(\t|\s{2,})(.*)$/) {
       $indent=$ind2;
       if ($1 eq '|') {$line=' ';}
       else {$line=$1.'   ';}
@@ -3639,12 +3786,12 @@
 TeXcount version ${versionnumber}, ${versiondate}.
 
 :::::::::: Reference
-The TeXcount script is copyright of ${maintainer} (${copyrightyears}) and published under the LaTeX Project Public Licence.
-
 Go to the TeXcount web page
     ${website}
-for more information about the script, e.g. news, updates, help, usage tips, known issues and short-comings, or to access the script as a web application. Feedback such as problems or errors can be reported to einarro at ifi.uio.no.
+for more help and information about the script: news, updates, help, usage tips, known issues and short-comings, or to access the script as a web application. Feedback such as problems or errors can be reported to einarro at ifi.uio.no.
 
+The TeXcount script is copyright of ${maintainer} (${copyrightyears}) and published under the LaTeX Project Public Licence.
+
 :::::::::: License
 TeXcount version ${versionnumber}
   
@@ -3660,8 +3807,22 @@
 :::::::::: ShortHelp
 Syntax: texcount.pl [options] files
 
-Use option -help (or just -h) to get help; -help-options (-hopt) to get list of command line options, or -help-options=substring for help on all options containing substring.
+Use option -help (or just -h) to get help. For more detailed help, the following alternatives exist:
+@ -                      :
+  -help-man, -man          Manual with more extensive help
+  -help-rule={macro/envir}    Macro/environment handling rule (backslash needed with macros)       
+  -help-options (-hopt)    Get list of command line options
+  -help-options={substring}    Help on options containing substring
+  -help-styles             List styles which determine how different elements (words, macros, etc) are presented in the verbose output
+  -help-style={style}      Describe a particular style or style category
+  -help-tc, -help-instructions    Help on %TC:instruction for inserting TeXcount instructions into the TeX code.
 
+Help, documentation, FAQ and updates are available from the TeXcount web page:
+    ${website}
+or through running
+    texdoc texcount
+on the command line.
+
 ::::::::::::::::::::::::::::::::::::::::
 :::::::::: HelpTitle
 ***************************************************************
@@ -3671,6 +3832,18 @@
 Count words in TeX and LaTeX files, ignoring macros, tables, formulae, etc.
 
 ::::::::::::::::::::::::::::::::::::::::
+:::::::::: HelpText
+The script counts words as either words in the text, words in headers/titles or words in floats (figure/table captions). Macro options (i.e. \\macro[...]) are ignored; macro parameters (i.e. \\macro{...}) are counted or ignored depending on the macro, but by default counted. Begin-end groups are by default ignored and treated as 'floats', though some (e.g. center) are counted.
+
+Mathematical formulae are not counted as words, but are instead counted separately with separate counts for inlined formulae and displayed formulae. Similarly, the number of headers and the number of 'floats' are counted. Note that 'float' is used here to describe anything defined in a begin-end group unless explicitly recognized as text or mathematics.
+
+The verbose options (-v1, -v2, -v3, showstate) produces output indicating how the text has been interpreted. Check this to ensure that words in the text has been interpreted as such, whereas mathematical formulae and text/non-text in begin-end groups have been correctly interpreted.
+
+Summary, as well as the verbose output, may be produced as text (default) or as HTML code using the -html option. The HTML may then be sent to file which may be viewed with you favourite browser.
+
+Under UNIX, unless -nocol (or -nc) has been specified, the output will be colour coded using ANSI colour codes. Counted text is coloured blue with headers are in bold and in HTML output caption text is italicised. Use 'less -r' instead of just 'less' to view output: the '-r' option makes less treat text formating codes properly. Windows does not support ANSI colour codes, and so this is turned off by default.
+
+::::::::::::::::::::::::::::::::::::::::
 :::::::::: OptionsHead
 
 Syntax: texcount.pl [options] files
@@ -3677,7 +3850,7 @@
 
 Options:
 
-:::::::::: OptionsPrefix
+:::::::::: OptionsFormat
 @ -          :
 :::::::::: Options
   -relaxed      Uses relaxed rules for word and option handling: i.e. allows more general cases to be counted as either words or macros.
@@ -3738,10 +3911,13 @@
   -codes        Display output style code overview and explanation. This is on by default.
   -nocodes      Do not display output style code overview.
   -out=         Write output to file, give filename as option value.
+  -out-stderr   Write output to STDERR instead of STDOUT.
   -h, -?, -help, /?    Help text.
-  -h=, -?=, -help=, /?=    Takes a macro or group name as option and returns a description of the rules for handling this if any are defined. If handling rule is package specific, use -incpackage=package name: -incpackage must come before -h= on the command line to take effect.
+  -help-man, -man    Short manual.
+  -h=, -help-rule=    Takes a macro or group name as option and returns a description of the rules for handling this if any are defined. If handling rule is package specific, use -incpackage=package name: -incpackage must come before -h= on the command line to take effect.
   -help-options, -h-opt    List all options.
   -help-options=, -h-opt=   List all options containing the provided string, e.g. -h-opt=dir or -h-opt=-v (the initial - in -v causes only options starting with v to be listed).
+  -help-tc, -help-inst    List all TeXcount instructions insertable as %TC comments in the TeX document.
   -help-style   List the styles and style categories: i.e. those permitted used with -v={styles-list}.
   -help-style=   Give description of style or style category.
   -ver, -version    Print version number.
@@ -3748,17 +3924,6 @@
   -lic, -license, -licence    Licence information.
 
 ::::::::::::::::::::::::::::::::::::::::
-:::::::::: HelpText
-The script counts words as either words in the text, words in headers/titles or words in floats (figure/table captions). Macro options (i.e. \\macro[...]) are ignored; macro parameters (i.e. \\macro{...}) are counted or ignored depending on the macro, but by default counted. Begin-end groups are by default ignored and treated as 'floats', though some (e.g. center) are counted.
-
-Mathematical formulae are not counted as words, but are instead counted separately with separate counts for inlined formulae and displayed formulae. Similarly, the number of headers and the number of 'floats' are counted. Note that 'float' is used here to describe anything defined in a begin-end group unless explicitly recognized as text or mathematics.
-
-The verbose options (-v1, -v2, -v3, showstate) produces output indicating how the text has been interpreted. Check this to ensure that words in the text has been interpreted as such, whereas mathematical formulae and text/non-text in begin-end groups have been correctly interpreted.
-
-Summary, as well as the verbose output, may be produced as text (default) or as HTML code using the -html option. The HTML may then be sent to file which may be viewed with you favourite browser.
-
-Under UNIX, unless -nocol (or -nc) has been specified, the output will be colour coded using ANSI colour codes. Counted text is coloured blue with headers are in bold and in HTML output caption text is italicised. Use 'less -r' instead of just 'less' to view output: the '-r' option makes less treat text formating codes properly. Windows does not support ANSI colour codes, and so this is turned off by default.
-
 :::::::::: TCinstructions
 Parsing instructions may be passed to TeXcount using comments in the LaTeX files on the format
 @ -      :
@@ -3766,14 +3931,14 @@
 and are used to control how TeXcount parses the document. The following instructions are used to set parsing rules which will apply to all subsequent parsing (including other files):
   %TC:macro [macro] [param.states]
     |    macro handling rule, no. of and rules for parameters
-  %TC:macroword [macro] [number]
-    |    macro counted as a given number of words
+  %TC:macrocount [macro] [number]
+    |    macro counted as a given number of words (alternative: %TC:macroword)
   %TC:header [macro] [param.states]
-    |    header macro rule, as macro but counts as one header
+    |    header macro rule, as macro but counts as one header (deprecated, use instead: %TC:macro \macro [header])
   %TC:breakmacro [macro] [label]
     |    macro causing subcount break point
-  %TC:group [name] [param.states] [content-state]
-    |    begin-end-group handling rule
+  %TC:envir [name] [param.states] [content-state]
+    |    \begin-\end environment handling rule (alternative: %TC:group)
   %TC:floatinclude [macro] [param.states]
     |    as macro, but also counted inside floats
   %TC:preambleinclude [macro] [param.states]
@@ -3788,6 +3953,7 @@
   %TC:incbib                include bibliography (same as running with -incbib)
   %TC:ignore                ignore region, end with %TC:endignore
   %TC:insert [code]         insert code for TeXcount to process as TeX code
+  %TC:subst [from] [to]     replace string thoughout document
   %TC:newtemplate           start a new template, ie delete the existing one
   %TC:template [template]   add another line to the template specification
 See the documentation for more details.

Deleted: trunk/Master/texmf-dist/doc/support/texcount/QuickReference.pdf
===================================================================
(Binary files differ)

Deleted: trunk/Master/texmf-dist/doc/support/texcount/QuickReference.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/QuickReference.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/QuickReference.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,49 +0,0 @@
-\documentclass{article}
-\usepackage[T1]{fontenc}
-\usepackage{a4wide}
-\usepackage{times}
-
-\include{macros}
-
-
-\title{%
-\TeXcount{} Quick Reference Guide\\
-Version \version\copyrightfootnote
-}
-
-\begin{document}
-
-\maketitle
-
-\section{Command line options}
-
-Syntax for running \TeXcount{}:
-\codeline{texcount \textit{[options] [files]}}
-where \code{texcount} refers to the TeXcount Perl-script, and the options may be amongst the following:
-
-\input{sub_options}
-
-
-\section{\TeXcount{} instructions embedded in \LaTeX{} documents}
-
-Instructions to \TeXcount{} can be given from within the
-\LaTeX{} document using \LaTeX{} comments on the format
-\codeline{\%TC:\textit{instruction [name] parameters}}
-where the name is use for instructions providing macro handling rules to give the name of the macro or group for which the rule applies.
-%
-\input{sub_tc_other}
-
-Instructions for adding macro handling rules all take the format
-\codeline{\%TC:\textit{instruction name parameters}}
-where the name indicates the macro (with backslash) or group name for which the rule applies:
-%
-\input{sub_addrules}
-
-The available parser rules for environment contents and macro parameters are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath}, \code{ignore}, \code{xx} (strong exclude), \code{xxx} (stronger exclude), \code{xall} (exclude all) or any of their aliases.
-
-The available counters are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath} or any of their aliases.
-
-Available file specifications contain one or more of \code{input} (for \code{\bs{input}}), \code{file} (file path), \code{texfile} (use with \code{\bs{include}}), \code{<bbl>} (to include the bibliography file), \code{dir} and \code{subdir}. The \code{dir} and \code{subdir} are used to modify the search path within the included document (used with the \code{import} package).
- 
-
-\end{document}

Deleted: trunk/Master/texmf-dist/doc/support/texcount/README
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/README	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/README	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,9 +0,0 @@
-TeXcount is a Perl script that counts the number of words in the 
-text of LaTeX files. It has rules for handling most of the common 
-macros and provides colour coded output indicating which parts have 
-been counted. Go to
-  http://app.uio.no/ifi/texcount/
-for more information or to access the script online as a web service.
-
-The package, i.e. the script and all accompanying files, is
-distributed under the LaTeX Project Public License.

Added: trunk/Master/texmf-dist/doc/support/texcount/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/README.md	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/README.md	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,37 @@
+TeXcount
+========
+
+TeXcount is a Perl script that counts the number of words in the 
+text of LaTeX files. It has rules for handling most of the common 
+macros and provides colour coded output indicating which parts have 
+been counted.
+
+
+# Web site and online web service
+
+Go to
+  http://app.uio.no/ifi/texcount/
+for more information or to access the script online as a web service.
+
+
+# Documentation
+
+There is documentation at different levels of detail and technicality:
+
+* **QuickReference** gives a quick overview of options and TeXcount instructions which may be used to control how TeXcount processes TeX documents.
+
+* **TeXcount** is the main documentation giving a detailed explanation of how TeXcount processes TeX documents, and how to use options and TeXcount instructions to customise the word counting and reporting the results.
+
+* **TechDoc** is the technical documentation of the internals of TeXcount.
+
+Additional documentation, including scripts and examples, are available on the TeXcount web site:
+  http://app.uio.no/ifi/texcount/
+
+Questions posted to StackExchange and tagged *texcount* will automatically notify the TeXcount developer:
+  https://tex.stackexchange.com/questions/tagged/texcount
+
+
+# License
+
+The package, i.e. the script and all accompanying files, is
+distributed under the LaTeX Project Public License.


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/README.md
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Deleted: trunk/Master/texmf-dist/doc/support/texcount/TeXcount.pdf
===================================================================
(Binary files differ)

Deleted: trunk/Master/texmf-dist/doc/support/texcount/TeXcount.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/TeXcount.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/TeXcount.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,727 +0,0 @@
-\documentclass{article}
-\usepackage[T1]{fontenc}
-\usepackage{a4wide}
-\usepackage{times}
-\usepackage{listings}
-\usepackage{url}
-
-%% TeXcount parsing rules
-%TC:envir lstlisting [] xall
-%TC:macro url [ignore]
-
-\include{macros}
-
-%\parindent=0pt\parskip=8pt
-\lstset{basicstyle=\ttfamily\scriptsize,xleftmargin=2cm,xrightmargin=2cm}
-\lstset{basicstyle=\ttfamily\scriptsize,xleftmargin=2cm,xrightmargin=2cm}
-
-\title{%
-\LARGE
-\TeXcount\\
-\Large
-Perl script for counting words in \LaTeX{} documents\\
-Version \version\copyrightfootnote
-}
-\author{Einar Andreas R{\o}dland}
-
-\begin{document}
-
-\maketitle
-
-{\abstract%
-\TeXcount{} is a Perl script for counting words in \LaTeX{} documents. It recognises most of the common macros, and has rules for which parameters to count and not to count; the main text is counted separately from the words in headers and in captions of figures and tables. Finally, it produces a colour coded version of the parsed document, either as a text document or as HTML to be viewed in a browser, indicating which parts of the document have been included in the count.
-%
-}
-
-{\scriptsize\tableofcontents}
-
-\pagebreak
-
-
-% ---------------------------------------------------------------------------
-
-\section{What \TeXcount{} does}
-
-\TeXcount{} is a Perl script made for counting the words in a \LaTeX{} document. Since \LaTeX{} documents are formated using lots of macro instructions and often contain both mathematical formulae and floating tables and figures, this is no trivial task.
-
-Simple solutions to counting the words consists of detexing the documents, which often merely consisty of ignoring the \TeX{} and \LaTeX{} instructions. This is a bad solution since it will usually result in over-estimating the number of words as mathematical formulae, citations, labels and references are counted.
-
-A perfect solution, if such exists, needs to take into account how \LaTeX{} interprets each macro instruction. The simplest approach to taking this into account consisty of making the count based on the typeset document, but this too tends to over-estimate the word count as mathematical formulae, table contents and even page numbers may get counted.
-
-A simple but robust approach, which is the one I have taken with \TeXcount{}, is to parse the \LaTeX{} document using simple rules for how to interpret the different \TeX{} and \LaTeX{} instructions. Rules for most of the common macro instructions are included in \TeXcount{}, and it is possible to specify new rules in the \TeX{} document.
-
-The primary focus of \TeXcount{} is to:
-\begin{itemize}
-
-\item provide an accurate count of the number of words in \LaTeX documents;
-
-\item exclude or count separately document elements which are not part of the main text such as figure captions;
-
-\item enable the user to, with relative ease, check how \TeXcount{} has parsed the document and which elements have been counted and which have not.
-
-\end{itemize}
-The last point on this list is one of the most important. Having an accurate word count is of little value unless you know that it is accurate; conversly, trusting an inaccurate word count can be potentially harmful, e.g. if you are submitting a paper or a report which has a strict word limit.
-
-\TeXcount{} handles complete \LaTeX{} documents, i.e. that start with \code{\bs{documentclass}} and has the text between \code{\bs{begin}\{document\}} and \code{\bs{end}\{document\}}, as well as partial documents made to be included in another \LaTeX{} document. However, in either case, it requires that all groups are closed: \code{\{\ldots\}} and \code{\bs{begin}\ldots\bs{end}}.
-
-Automatic parsing of included documents is possible, but is by default turned off. There are two options for turning this on: \code{-inc} and \code{-merge}. Turning it on using \code{-merge} will merge the included files into the main document. By using \code{-inc}, however, the included files are parsed separately rather than include the text into the appropriate location: this will perform a separate word count of the included document which is then later included in the total sum.
-
-Since \TeXcount{} relies on a relatively simple rules for handling the different macros and only performs limited checks on the validity of the \LaTeX{} document, it is your responsibility to make sure the document actually typesets in \LaTeX{} before running it through \TeXcount{}. Also, \TeXcount{} relies on handling macros taking parameters enclosed with \{ and \}, and on ignoring options enclosed by [ and ]: macros with significantly different syntax such as \code{\bs{vskip}} cannot be handled. There are also limitations on what may be contained in macro options enclosed in [], although this restriction may be relaxed by specifying the command line option \code{-relaxed}.
-
-
-\subsection{What \TeXcount{} counts}
-
-Basically, \TeXcount{} has seven different counts plus an additional file count for use with total counts over a set of files. These and their indices (numbers used to identify them) are:
-\begin{description}
-\item[0. Number of files:] When multiple files are included, this is counted.
-\item[1. Text words:] Words that occur in the main text.
-\item[2. Header words:] Words that occur in headers, e.g. \code{\bs{title}} and \code{\bs{section}}.
-\item[3. Caption words:] Words that occur in figure and table captions.
-\item[4. Header count:] This counts the number of headers, i.e. each \code{\bs{section}} counts as 1.
-\item[5. Figure/float count:] This counts the number of floats and figures, e.g. \code{table} and \code{figure} environments.
-\item[6. Inline formulae:] This counts the number of inline formulae, i.e. \code{\$\ldots\$}.
-\item[7. Displayed formulae:] This counts the number of displayed formulae, e.g. \code{\bs{[}\ldots\bs{]}} or \code{equation} environments.
-\end{description}
-These are stored in an array and sometimes referenced by their index: e.g. in the option \code{-sum=} which takes parameter values corresponding to counts 1 to 7. In other contexts, however, like in the \code{-tempate=} or when incrementing specific counters through the \code{\%TC:macrocount} instruction, the counters may be referred to by keywords rather than the indices 0 to 7.
-
-The primary role is to count the words. It is not entirely clear what should be considered words, so I have had to make some decisions. A sequence of letters is certainly a word. \TeXcount{} also counts acronyms like \textit{e.g.}, dashed words like \textit{over-all}, and \textit{it's} as one word. It also counts numbers as words unless they are placed in a math group. If \TeXcount{} breaks words that contain special characters, you may try the option \code{-relaxed} which extends the range of characters allowed as part of words.
-
-Alternatively, \TeXcount{} may be asked to count the number of letters/characters (not including spaces). It may also be set to count Chinese or Japanese characters.
-
-Mathematical formulae are not counted as words: it would be difficult to define a sensible rule for this. Instead, \TeXcount{} counts the number of inline formulae and displayed formulae separately. You may then decide on how to combine these counts with the word counts, e.g. using the \code{-sum} option.
-
-Text in headers (\code{\bs{title}}, \code{\bs{section}}, etc.) are counted separately: \TeXcount{} counts the number of headers as well as the number of words in headers. It may also provide subcounts for each of these by specifying the \code{-sub} option.
-
-Floating environments (or potentially floating environments) such as tables and figures are not counted as text, even if the cells of a table may containt text. However, if they have captions, these will be counted separately much like headers were. Footnotes are included in this count. By default, environments do not modify the parsing state: i.e. environments within the text are counted as text, etc. Rules for the most common environments, at least those that require non-default treatment, should be predefined, but you may have to add more rules if you use environments defined in packages or by yourself. If you wish to be warned against any environments names you use that lack a defined rule, set the option \code{-strict}.
-
-Some macros are words by themselves: e.g. \code{\bs{LaTeX}}. These are counted as words provided the macro word rule has been defined for them, but you cannot expect \TeXcount{} to count something like \code{\bs{LaTeX}-word} or \code{\{\bs{TeX}\}count} as one word although the above explanation inicates that it should: \TeXcount{} will in both cases evaluate the macro and the following text separately and thus count them as separate entities. Since \TeXcount{} recognises \code{\bs{LaTeX}} and \code{\bs{TeX}} as single words, each of the two examples would end up being counted as two words.
-
-
-\subsection{What \TeXcount{} does not do}
-
-While an ideal solution should be able to expand the macro instructions, thus being able to handle new macros, that would at it's worst require reimplementing much of \TeX{}, something that is clearly unrealistic. Instead, I have opted for a simpler solution: to define rules stating which paramters to count and which to ignore and allowing for such rules to be added easily. Thus, \TeXcount{} cannot handle macros that may take a variable number of parameters. Nor can it handle macros that takes parameters on forms other than \code{\{parameter\}}. However, support has now been added for macro options on the form \code{[\ldots]} to be parsed.
-
-In general, while \TeXcount{} does the parsing in some detail, it does not do it exacly as \TeX{} does it. In some respects there may therefore be critical differences: e.g. while \TeX{} reads one character at a time, \TeXcount{} reads one word at a time, so while \LaTeX{} would interpret \code{\bs{cite} me} as \code{\bs{}cite\{m\}e}, \TeXcount{} would interpret it like \code{\bs{cite}\{me\}}.
-
-Another issue is that, since \TeXcount{} does not know how to expand macros, it cannot handle macros like \code{\bs{maketitle}} that add text to the document. With respect to \code{\bs{maketitle}}, I have instead set the rule for \code{\bs{title}\{title text\}} to count this as a header although it does not itself produce any text.
-
-
-\subsection{Problems to be aware of}
-
-In most large documents, there will be cases where \TeXcount{} does not give an exact count. Reasons may be macros \TeXcount{} does not recognise, words that \TeXcount{} split in two (or more) because of special characters not recognised as letters, or options and parameters not counted which actually produce text. Some problems may also arise because it is not always clear what should be counted and \TeXcount{} implements one particular choice: counting numbers as letters/words, not counting formulae as words, not to count tables as text, etc. However, hopefully these should either consist of individual, infrequent errors which should have limited effect on the total count, or entire regions that are included or excluded for which the user may change the parsing rule to produce the desired count.
-
-There are, however, problems that may arise which are more fundamental and result in counts which are simply wrong rather than just inaccurate, or even make \TeXcount{} fail entirely.
-
-If \TeXcount{} fails to detect environment endings properly, either closing \code{\{} or \code{\bs{end}}, it may end up ignoring major parts of the document. This should normally produce errors of some kind, although there may be cases when no errors are produced. However, by looking at the verbose output, it will be very clear that entire parts of the document has been excluded. Such problems may be cause by macros that allow unmatched group delimiters, and some effort has been made to minimise the risk of this at the cost of risking other but less critical errors: e.g. there are limits to what is permitted as macro options in order to ensure that a single unmatched \code{[} does not cause large parts of the document to be interpreted as a big option.
-
-For users of languages containing letters other than the Latin letters A to Z, there is a risk that \TeXcount{} may have difficulty identifying words correctly. The script relies on Perl to recognise words as sequence of letters, and must therefore know which characters are considered to be letters. Words containing letters not recognised by \TeXcount{} will tend to be split into two or more words, which can dramatically inflate the word count. The first step is to ensure that the file is read using the correct encoding: I generally suggest using the UTF-8 Unicode encoding, and from version 2.3. this is the default encoding used by \TeXcount{}, although other encodings may also be used. Unicode has good annotation of which characters are letters, and starting with version 2.3, \TeXcount{} uses Unicode internally to represent the text.
-
-While non-Latin letters like \code{\aa} and \code{\"a} should be recognised as letters, \TeX/\LaTeX codes using macros or special characters, such as \code{\bs{aa}} and \code{\bs{"}a}, are not immediately understood as letters. I have added patterns aimed at recognising these as well, but depending on the code you are writing, these patterns may either not be flexible enough to recognise all letter codes, or may be too flexible and recognise things it should not. I have added a relaxed mode (\code{-relaxed}) and a more restricted mode (\code{-restricted}) in which these patterns are more general or more constrained, but you should check how this performs on you actual texts by viewing the verbose output.
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{Syntax and options}
-
-
-\subsection{Running \TeXcount{}}
-
-The command to run \TeXcount{} may vary slightly depending on the operating system and the local settings. You may also wish to rename it or define an alias.
-
-Under Windows, running \code{texcount.pl} from the command line suffices if \code{texcount.pl} is in the path and pl-files are defined to run as Perl scripts.
-
-Under Linux/Unix, it should be sufficient to run \code{texcount.pl} provided it is in the PATH and has been made executable (\code{chmod u+x texcount.pl}). The first line of the file contains the line \code{\#!/usr/bin/env perl} which should find the correct location for \code{perl} (provided the program \code{/usr/bin/env} is available). If not, run \code{which perl} to locate Perl and replace the first line of the script with \code{\#!\textit{path}}.
-
-Alternatively, if the above methods do not work, you may have to run \TeXcount{} exclicitly in Perl by executing \code{perl texcount.pl}. You then need to have the \code{perl} executable in the path or give the explicit path.
-
-For simplicity, I will simply write \code{texcount.pl} in this manual for the code to execute the script. The syntax then becomes
-\codeline{texcount.pl \textit{[options] [files]}}
-where the options may be amongst the following:
-
-\input{sub_options}
-
-If more than one file is given, \TeXcount{} will perform the count on each of them printing the results for each, then print the total sum at the end. Note that files are parsed one by one in order of appearance and counts made per file; only afterwards are the totals computed. 
-
-
-\subsection{File encoding}
-
-If your \TeX/\LaTeX{} document consists entirely of ASCII characters, there should be no problems with file encoding. However, if it contains non-ASCII characters, e.g. non-Latin letters such as \o{}, there are different ways in which these may be encoded in the files.
-
-The main encoding supported by \TeXcount{} is UTF-8 (Unicode), and this is used to represent text internally in \TeXcount{}. In older versions of \TeXcount{}, Latin-1 (ISO-8859-1) was the default encoding, but this may cause problems when using non-Latin characters.%
-\footnote{%
-In Perl, which \TeXcount{} is written in, Latin-1 is the default. However, starting with version 2.3, \TeXcount{} has switched to using UTF-8 (Unicode) internally and will convert text to Unicode before processing: in older version, internal representation was UTF-8 or Latin-1 depending on the options used.}
-Both of these are compatible with ASCII: i.e. both are extensions of ASCII, so ASCII characters will be treated correctly by both encodings, but non-ASCII characters will be treated differently.
-
-From version 2.3 of \TeXcount{}, it is possible to specify other encodings using the \code{-encoding=} option. If no encoding is specified, \TeXcount{} will guess which encoding is used. By default, this guessing is limited to ASCII, UTF-8 and Latin-1. If other encodings are used, the automatic guessing is likely to pick Latin-1 since most files would result in valid Latin-1 code. If the \code{-chinese} or \code{-japanese} option is set, it will guess at other encodings, but still with UTF-8 as the first choice.
-
-I generally recommend using UTF-8 Unicode: this is increasingly being the new standard. Basically, Unicode contains the characters needed for all existing languages, enumerated from 0 and upwards (beyond 100000), which resolves to problem of requiring different character sets. Since there are more than 256 characters in Unicode, Unicode cannot be represented using one byte per character: UTF-8 is a way to encode the Unicode characters into a list of bytes so that ASCII characters (no. 0--127) are represented by one byte (same as in ASCII), while non-ASCII characters are represented using two or more bytes. Unicode may also be encoded using two bytes to represent each of Unicode characters 0--65535, which covers most of practical use, but this is less commonly used as a file format: it is, however, common for internal representation of strings in memory, as done by e.g. Java, so Perl is the odd one out in using UTF-8 for internal string representation.
-
-If an encoding is specified using the \code{-encoding=} option, the input will be decoded from the specified encoding into UTF-8. If HTML output is specified, the output will be UTF-8. This ensures that all HTML produced is UTF-8, which is also the encoding specified in the HTML header. If text output is used, the specified encoding is used for the output. E.g. if you specify \code{-encoding=latin1}, \TeXcount{} will assume that all files are encoded in Latin-1, and will also produce the detailed output using Latin-1. For piping, i.e. option \code{-}, this is useful as it ensures the output has the same encoding as the input.
-
-For convenience, if no encoding is specified, \TeXcount{} will try to guess which encoding is the appropriate one. This is done simply by checking a specified list of encodings one by one until one is found that fits the text. The default is to check ASCII, then UTF-8, and finally Latin-1. If none fits, \TeXcount{} should try to decode the ASCII part of the text replacing non-ASCII characters with a wildcard character, although there may be cases when the decoding exits upon hitting an error. If Chinese or Japanese languages are specified, UTF-8 is tried first, then other encodings are checked depending on the language.
-
-Note that if no encoding is specified and \TeXcount{} left to guess the appropriate encoding, all output will be UTF-8. Thus, letting \TeXcount{} guess the encoding may not be suitable when using \TeXcount{} in a pipe since the UTF-8 output may not be compatible with the encoding of the input. If multiple files are parsed, \TeXcount{} will guess the encoding separately for each file even if they are included (\code{-inc} or \code{-merge}) in a file with an identified encoding, and may thus end up selecting different encodings for different files.
-
-
-\subsection{Language scripts, alphabets and character sets}
-
-In additional to the traditional Latin letters, A-Z, a number of letters are recognised by Unicode as part of the extension of the Latin letters. Some languages, however, use entirely different character sets.
-
-By default, \TeXcount{} has been set up to recognise all alphabets. However, there is a distinction between alphabets like the Latin, Greek, Cyrillic, etc. in which words consists of multiple letters, and languages like Chinese in which each character should be counted as a word. For simplicity, we refer to these as \emph{alphabetic} characters and \emph{logograms}.\footnote{%
-Actually, these names are not completely accurate. A logogram is a script which represents a word or `meaningful unit', but e.g. the Japanese kana and Korean hangul are counted as words although they represent sound or syllables rather than meanings.}
-The options \code{-alphabets=} and \code{-logograms=} (or \code{-alpha=} and \code{-logo=} for short) allows you to specify which characters to use as either alphabetic letters or whole word characters. These take values that consist of Unicode properties separated by \code{,} or \code{+}. The default setting corresponds to
-\codeline{-alphabets=Digit,alphabetic}
-in which \code{alphabetic} is defined by \TeXcount{} as the Unicode \code{Alphabetic} class minus logographic script classes, and
-\codeline{-logograms=Ideographic,Hiragana,Katakana,Thai,Lao}
-which should cover Chinese characters (\code{Han}) as well as the Japanese characters (\code{Han} for the kanji, \code{Hiragana} and \code{Katakana} for the kana). Both options remove previous script settings, unless the list is prefixed by \code{+} in which case the scripts are added: e.g. \code{-logograms=+cjkpunctuation} will add the CJK punctuation characters (defined by \TeXcount) to the set of counted characters.
-
-Applicable Unicode properties/scripts include \code{Digit}, \code{Latin}, \code{Greek}, \code{Cyrillic}, \code{Hebrew}, \code{Arabic}, \code{Han}, \code{Katakana}, \code{Hiragana}, and more.\footnote{A more complete overview is available at Wikipedia: \url{http://en.wikipedia.org/wiki/Script_(Unicode)}.}
-
-In addition to the Unicode properties, \TeXcount{} has added a few additional character groups. The properties \code{alphabetic}, \code{digit} and \code{alphanumeric} are more restrictive than their Unicode name-sakes: \code{alphabetic} excludes the default logographic character sets, and \code{digit} consists only of 0--9 unlike Unicode \code{Digit} which includes numerals from other scripts. There is also \code{cjkpunctuation} which is intended to identify Chinese/Japanese/Korean punctuation.
-
-Note that the Unicode properties are case sensitive. The native Unicode properties start with capital letters, whereas the properties defined by \TeXcount{} are all lower case. Invalid properties will be ignored.
-
-The options \code{-chinese} and \code{-japanese} still exist and simply restrict the logographic character sets. In addition, \code{-chinese-only} and \code{-japanese-only} will exclude alphabetic words from the counting, equivalent to \code{-alphabets=} with no script properties given. In addition, these options will change the lists of file encodings \TeXcount{} will try if no encoding is given.
-
-The option \code{-stat} has been added to produce overall word counts per script type. This uses the character classes specified in the \code{-alphabets=} and \code{-logograms=} options, so the default will be able to count which words are purely alphabetic and which contain numbers (or a combination of both), but will not distinguish between e.g. Latin and Greek. To do that, you would have to specify the script classes: e.g.
-\codeline{-alphabets=digit,Latin,Greek,Cyrillic}
-will count words containing the numbers 0--9, Latin letters (including the extended Latin character set), Greek letters and Cyrillic letters. Words may contain any combination of these: \TeXcount{} does not require that a word consist of only one type of script. Also, note that if \code{digit} had not been included, numbers would not be allowed to be part of or counted as words. The output statistics will then give the number of words containing each of these script classes (or combination).
-
-
-\subsection{Parsing details}
-
-By selecting one of the \code{-v} options, you can choose how much detail is printed. This is useful for checking what \TeXcount{} counts. Alternatives \code{-v0} to \code{-v4} control the amount of detail, with \code{-v} equal to \code{-v3}. The option \code{-showstate} shows the internal state and is for debugging purposes only: \code{-v4} switches this on.
-
-The output is colour coded with counted text coloured blue, other colours for other contexts. The colour coding is made using ANSI colour codes. These should work when printed directly to Linux xterm window, but need not work if piped through \code{more} or \code{less}: with \code{less} you need to use the option \code{-r} for the colours to be shown correcly.
-
-Under Windows or other operating systems, regular ANSI colour don't work, but there is a fix in place which adapts it for Windows, although this may not function exactly as desired.
-
-In general, however, I recommend using HTML output which can be viewed in a browser: in particular if the text output does not produce suitable colour coding.
-
-To print the details encoded as HTML document, use the option \code{-html}. Alternatively, \code{-htmlcore} only outputs the HTML body. I suggest using the options \code{-html -v} to get full detail, save this to a HTML file, e.g. using
-\codeline{texcount.pl -html -v -sum \textit{files} > details.html}
-where \code{-sum} computes the total count of words and formulae (or \code{-sum=1,1,1} to only count words) and adds the cumulative count at the end of each line of the parsing details, and \code{-sub} is on by default which produces subcounts per section.
-
-\subsubsection{Control of details in verbose output}
-
-The verbosity option, \code{-v=\parm{styles-list}} or \code{-v\alt{0-4}\parm{styles-list}}, can be used to select exactly which elements to include or exclude from the verbose output. The styles list consists of a list of styles or style categories with \code{+\parm{style}} or \code{-\parm{style}} used to indicate if they should be added or removed. If the first style in the list is one of the categories 0 to 4, the \code{=} can be dropped. The option \code{-help-style} returns an overview of the available styles and style categories, while \code{-help-style=\parm{style}} may be used to get a description of a particular style or style category.
-
-Each token in the verbose output has a defined style: e.g. \code{word}, \code{hword} (header word), \code{ignore}, \code{option}, etc. If the style is included in the styles list, it will be printed in the verbose output; if not included in the styles list, it will not be printed. Thus, by setting which styles are included in the styles list, you can specify in detail which tokens are included in the verbose output. The included styles correspond to the list of colour codes listed at the start of the output when \code{-codes} is set.
-
-The style categories, which include \code{0} to \code{4}, are groups of related styles: e.g. \code{Words}, \code{Macros}, \code{Options}, etc. Note that apart from \code{0} to \code{4}, the style categories have capital initials, while the styles themselves are all lower case.
-
-For example, if you only want to output words (including those in headers and other contexts), you can set the option \code{-v=Words}; using \code{-v=Words+math}, the equation contents will be included (but not the enclosing \code{\$\ldots\$}).
-
-
-\subsection{Summary information}
-
-By default, \TeXcount{} outputs counts of text words, header words, caption words, number of headers, number of floats/figures, number of inlined formulae, and number of displayed formulae, and lists each of these counts. To shorten this to a one-line format per file, specify \code{-brief}.
-
-To get \TeXcount{} to produce a total count, specify \code{-sum}: this will compute the sum of all words plus the number of formulae. A customized sum may be computed by speficying \code{-sum=n,n,\ldots} with up to seven numbers separated by commas giving the weight (0=don't count, 1=count once) of each of the seven counts: e.g. the default is equivalent to \code{-sum=1,1,1,0,0,1,1}. To count words only, use \code{-sum=1,1,1}. Higher weights may also be used, e.g. to count displayed formulae or floats/figures as a given number of words.
-
-Specifying \code{-sum} has two main effects: the cumulative sum is added to the output in verbose formats, and the sum is added to the summary. If combined with \code{-brief}, the option \code{-total} is automatically set, resulting in a one line output containing only the total sum.
-
-For adding subcounts e.g. by sections, the option \code{-sub} (or \code{-subcount}) may be used. By default, this produces subcounts by part, chapter, section and subsection which are listed in a brief format. One may, however, specify \code{-sub=} followed by \code{part}, \code{chapter}, \code{section}, or \code{subsection} (default when given without value). Break points which initiate a new subcount may also be specified within the \LaTeX{} document using \code{\%TC:break name}.
-
-If included files are included in the count (\code{-inc}), counts per file will be produced followed by a total count. Note that the counts for the included files are not included in the counts for the main document, and in particular is not included in the subcounts (e.g. per section). To suppress per file counts, the option \code{-total} may be used.
-
-By adding the option \code{-freq}, \TeXcount{} will output the word frequencies in order of descending frequency: this is only done for the total count, not per file. You may restrict the frequency table to words occurring at least $n$ times by specifying \code{-freq=\it{n}}. \TeXcount{} will count words irrespective of case, but the output will retain upper case where this is consistently used. Note that \TeXcount{} may not recognise that words are the same if they are written differently in the code, e.g. \code{{U}pper} and \code{Upper}.
-
-A frequency table for each script type (alphabetic, Han, etc. or script classes like Greek, Hebrew etc. if specified in \code{-alphabets=}) is produced by the option \code{-stat}. 
-
-
-\subsection{Parsing options}
-
-\TeXcount{} uses regular expressions to identify words and macro options. By default, these have been set so as to fit most common usages. However, some users may find the default to be too strict, e.g. not recognise options that are long and contain less common symbols. More permissive patterns may be selected by using the option \code{-relaxed}. This allows more general document elements to be identified as words or macro options, which may sometimes be desired, but may also have undesirable effects, so check the verbose output to verify that \TeXcount{} has counted the appropriate elements. Conversely, if the default settings tends to combine words that should be counted as separate words, you may try the option \code{-restricted}. 
-
-Macro options, i.e. \code{[\ldots]} after macros and macro parameters are ignored. Since \TeXcount{} has no specific knowledge of which macros take options, this is a general rule applied to all macros that take parameters\footnote{For macros that take no parameters, \code{[\ldots]} is not interpreted as a macro option. While slightly inconsisten, this avoids e.g. \code{\{\bs{bf}[bold text]\}} to be gobbled up as a macro option and ignored}. In order to avoid that uses of [\ldots] that are not macro options are mistaken as such, \TeXcount{} makes some restrictions on what may be contained in such an option. By default, this restriction is relatively strict under the assumption that it is better to count a few macro options as words than risk large fragments of text to be ignored. However, if your document contains macro options with more complicated values (e.g. certain special characters or macros), using \code{-relaxed} may help handle these correctly.
-
-By default, \TeXcount{} does not allow special characters or macros to be part of words. This may cause problems if character modifiers or some special characters are used which are entered as macros. The \code{-relaxed} option makes the word recognition regular expression somewhat more general.
-
-
-\subsection{File inclusion}
-
-By specifying \code{-inc} or \code{-merge}, \TeXcount{} will automatically count documents that are included using \code{\bs{input}} or \code{\bs{include}}. The difference between the two is that \code{-inc} analyses the included files separately, while \code{-merge} merges the included documents into the parent document. Thus, \code{-inc} will result in one count per file and a total sum at the end, while \code{-merge} will treat the merged document as if it was one file.
-
-The default option is \code{-noinc} indicating that included documents are not counted.
-
-Paths can absolute or relative. Relative paths are by default relative to the working directory, although e.g. the \code{import} package can cause files to be included from other directories. The working directory is by default set to be the current directory: i.e. the directory from which \TeXcount{} is executed. This default behaviour corresponds to the option \code{-dir=.}.
-
-The working directory can be specified explicitly by the \code{-dir=\parm{path}} option. The file names on the command line should still be relative to the current directory, i.e. the one from which \TeXcount{} is executed, while files included within these will be relative to the specified working directory.
-
-Alternatively, if \code{-dir} is used without setting a path, the working directory is determined by the directory containing the top level \LaTeX{} documents, i.e. the document specified on the command line; if several files are provided on the command line, these may result in different working directories. Note that \code{-dir} and \code{-dir=} are fundamentally different: the first indicates that the working directory is determined by the top level \LaTeX{} documents, while the second fixes the working directory to be the current directory.
-
-Note that when included documents are parsed as separate files, i.e. using \code{-inc}, the text of included documents is not included where the \code{\bs{input}} or \code{\bs{include}} is located. This has two consequences. First, since word counts are produced per file, subcounts, e.g. by chapter, will only include the text in the same file, not that of the included file. Secondly, if TC-instructions to \TeXcount{} are embedded in the \LaTeX{} document, e.g. defining additional macro handling rules, these take effect in the order they are parsed by \TeXcount{}. Since included documents are parsed after the parent document, definitions in the parent document will be in effect for the included documents; definitions made in the included documents will only be in effect for subsequently included documents, not in the parent or previously included documents.
-
-In addition to the \code{-dir} option for setting the working directory, there is a similar option \code{-auxdir} for setting the path to the auxilary directory where e.g. the bibliography \code{.bbl} file should be read from. The default setting is \code{-auxdir} which means that working directory is used. However, \code{-auxdir=\parm{path}} can be used to overrule this and set an alternative path. If \code{-dir=\parm{path}} is used, the auxilary path should be relative to the current directory, not to the working directory specified with \code{-dir=\parm{path}}; if \code{-dir} is used, as is the default, the working directory will be the directory containing the top level \LaTeX{} dodcuments (the ones specified on the command line), and the auxilary path will be relative to this, unless an absolute path is specified.
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{Macro handling rules}
-
-A few special macro handling rules are hard-coded into the \TeXcount{} script: i.e. the handling of those can only be changed by editing the script. However, \TeXcount{} primarily relies on a few general rules and macro and environment handling rules that follow a specific pattern.
-
-
-\subsection{General macro handling rules}
-
-The general macro handling rules fall into a few general categories:
-
-\begin{description}
-
-\item[Macro]In its simplest form, this type of rule just tells how many parameters to ignore following the macro. More generally, one may specify the number of parameters a macro takes and how each of these should be handled. Options enclosed in \code{[]} before, between and after parameters are also ignored; this also applies to macros not specified here, so for a macro with no rule, immediately following \code{[]}-options will be ignored. (This type of rule was called an exclude rule in older versions of \TeXcount{}, the reason being that the rule originally only gave the number of
-parameters to ignore following a given macro.)
-
-\item[Environment]For environments enclosed by \code{\bs{begin}\{\textit{name}\}} and \code{\bs{end}\{\textit{name}\}}, there are rules specifying how the contents should be interpreted. A macro rule is added for \code{begin\textit{name}} (without the backslash!) which is \TeXcount{}'s internal representation of \code{\bs{begin}\{\textit{name}\}}. Note that special characters like \code{*} may be part of the environment name, e.g. as in \code{equation*} and rules for these need be specified\footnote{Previously, trailing \code{*} was supposed to be ignored so the same rule would apply to environment \code{equation*} as to \code{equation}. However, due to a bug in a regular expression, this did not work as intended and I have decided not to follow that strategy and instead speficy these rules explicitly.}. \emph{Previously, environment rules were referred to as 'group rules', but I have now renamed this both in the \TeXcount{} script and documentation, and replaced \code{group} by \code{envir} wherever appropriate.}
-
-\item[Macroword]This type of rule indicates that the macro itself represents one or more words. Initially, \code{\bs{LaTeX}} and \code{\bs{TeX}} are defined with values 1 indicating that each represents one word.
-
-\item[Preamble]A few macros should be counted even if they are in the preamble. In particular, \code{\bs{title}\{title text\}} is counted as a header assuming it will later be used to produce a title.
-
-\item[Float inclusion]Within floats (environments with the \code{float} parsing rule) there may be texts that should still be counted: in particular captions. These are specified with the float inclusion rule.
-
-\end{description}
-
-Previously, there was also a separate header handling rule, but this is now incorporated into the more general macro handling rules.
-
-A macro parameter is normally on the form \code{\{\textit{something}\}}; more generally it may be anything \TeXcount{} parses as a single unit (or token), e.g. a macro, but since \TeXcount{} parses word by word rather than character by character this may not always be correct if parameters are not \code{\{\}}-enclosed or macros. In addition, some macros take optional parameters which are usually on the form \code{[\textit{option}]}, and \TeXcount{} can also (from version 4) count these.
-
-
-\subsection{Special macro handling rules}
-
-Some macros do not follow the pattern used by \TeXcount{} to represent macro handling rules. For some of these, special handling rules have been hard-coded into the \TeXcount{} script. For some, the macro syntax differs from the general rule, while in other cases the macros may trigger special processing.
-
-\begin{description}
-
-\item[file include]If \code{-inc} is specified, included files will also be parsed and the total presented at the end. Initially, \code{\bs{input}} and \code{\bs{include}} trigger file inclusion, but more file inclusion macros may be added to the \code{\%TeXfileinclude} hash. In addition to potentially triggering file inclusion, the syntax may differ in that \code{input} does not require the file name to be enclosed in \code{\{\ldots\}}.
-
-\item[package include]When packages are included using \code{\bs{usepackage}{name}}, \TeXcount{} will check for package specific macro handling rules to include. Initially, only \code{\bs{usepackage}} triggers package inclusion, but more macros may be added to the \code{\%TeXpackageinc} hash.
-
-\end{description}
-
-Complete \LaTeX{} documents should start with a \code{\bs{documentclass}} specification, then a preamble region which should not contain typeset text, before the main document starts with \code{\bs{begin}\{document\}}. However, \LaTeX{} files which are ment to be included into a document will not contain \code{\bs{documentclass}} and \code{\bs{begin}\{document\}}. A rule to recognise the preamble region is hard-coded into \TeXcount{}.
-
-Rules for identifying \code{\$\ldots\$}, \code{\$\$\ldots\$\$}, \code{\bs(\ldots\bs)}, and \code{\bs[\ldots\bs]} as formulae are hard-coded and basically parse until the closing token is encountered.
-
-The macros \code{\bs{def}} and \code{\bs{verb}} have hard-coded rules since these do not follow the pattern for macro handling rules, but may contain \LaTeX{} code which could seriously disrupt the parsing, e.g. by containing unclosed \code{\bs{begin}}. Macros like \code{\bs{newcommand}}, however, are handled by ordinary macro rules.
-
-The macro \code{\bs{biblography}} is handled to check if the bibliography file should be parsed. The \code{thebibliography} environment is also handled differently, one difference being that a bibliography header is added to the count.
-
-
-\subsection{Package specific macro handling rules}
-
-Starting with version 2.3, \TeXcount{} can handle different sets of macro handling rules for different packages. When a package is included in the \LaTeX{} code or through the \code{-incpackage} option, rules defined for the given package is added.
-
-Note that \TeXcount{} is still doing the analyses sequentially. It is therefore critical that the package inclusion takes place before any use of the package which may make a difference if you are analysing several files. E.g. if the main file contains \code{\bs{input} setup}, any packages included in \code{setup.tex} will not apply to the main file since this is parsed before \TeXcount{} parses \code{setup.tex}.
-
-As of now, the package support is sparse since most macro handling rules have been included in the main set of rules.
-
-
-\subsection{Bibliography handling}
-
-By default, the bibliography is not included in the word count. If the \code{-incbib} option is specified, however, bibliography parsing is turned on. If the bibliography is included from the \code{bbl} file using the \code{\bs{bibliography}} macro, this will be parsed as if included with the \code{-inc} option. If \code{-merge} is specified together with \code{-incbib}, the bibliography will be merged into the document.
-
-Note that bibliography parsing may be non-trivial and depend on the bibliography style used, so the verbose output should be checked: some styles perform considerable formatting which may confuse \TeXcount{}. In addition, initials, page numbers, etc. will all be counted as words, which may result in a word count which is higher than intendet.
-
-
-\subsection{Adding or modifying macro handling rules}
-
-There are basically two different ways in which you can add additional macro handling rules, e.g. for your own macros, or modify existing rules: by modifying the \TeXcount{} script, or by adding the rules through \TeXcount{} instructions embedded in the \LaTeX{} code.
-
-The simplest method is to use \TeXcount{} instructions which are embedded in your \LaTeX{} document as \LaTeX{} comments on the format \code{\%TC:\textit{instruction}}. This approach is described in some detail in section \ref{subsec:TC_addrule}.
-
-It is also possible to modify the \TeXcount{} code. The macro handling rules are mostly defined in the hash tables named \code{TeXmacro}, \code{TeXenvir}, etc., and editing these definitions is simple and does not required in-depth knowledge of Perl. A brief overview of the \TeXcount{} code is provided in section \ref{sec:code}. 
-
-
-\subsection{Cautions!}
-
-Since the rules are of a relatively general nature, macros that have a great deal of flexibility are hard to deal with. In particular this applies to macros with a variable number of parameters or where the handling of the parameters are not constant.
-
-By default, \TeXcount{} assumes that macro options, i.e. parameters on the form \code{[\ldots]}, should not be counted. From version 4.0, \TeXcount{} allows rules for optional parameters, but in most cases where optional parameters have not been specified in the macro handling rules, they will simply be ignored. There is some risk of misinterpreting text as an option: e.g. \code{\bs{bf}[text]}. This is not likely to be a frequent problem. However, if something like \code{\bs{bf}[a lot of text]} gets ignored because it is considered an option, it can influence the word count substantially. I have therefore been somewhat restrictive with what (and how much) may go into an option. The default restriction on what may be allowed as an option may sometimes be too restrictive, causing \TeXcount{} to interpret options as text or macro parameters; you may use the command line option \code{-relaxed} to relax this restriction and allow more general options.
-
-More advanced macros are not supported and can potentially confuse \TeXcount{}. In partcular, if you define macros that contain unbalanced \code{\bs{begin}}--\code{\bs{end}}, this will cause problems as \TeXcount{} needs to keep track of these to know where environments start and end.
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{Output from \TeXcount{}}
-
-\TeXcount{} will by default provide a summary of the word and element counts. This may, however, be modified either by specifying \code{-brief} which reduces it to a one line summaryper file, \code{-total} to suppress per file summaries, or by providing an alternative template.
-
-If there are parsing errors, \TeXcount{} will print warnings about these. You may turn off this by specifying \code{-quiet} (\code{-q} for short), but there will still be an added comment about the number of errors in the final statistics to warn you of any errors.
-
-
-\subsection{Count statistics}
-
-The summary output will by default provide a summary of all counts: i.e. word counts for text, header and captions, and the number of floats/tables, headers, inlined and displayed formulae. You may combine these into a summary count by using the \code{-sum} option which by default gives the total number of words and formulae. You may choose briefer output formats by using the \code{-brief} option which produces a one-line summary of the counts. The option \code{-1} is the same as specifying \code{-brief -total} and will give only one line of output for the total only. Combining \code{-brief} with \code{-sum} will cause only the sum to be printed rather than the full set of counts.
-
-If multiple files are processed in one run, \TeXcount{} will by default provide summary statistics per file. If files are included (using the \code{-inc} option), summaries of all files are provided as well as the total. If there is more than one file, i.e. main \LaTeX{} documents provided in the command line, it will also write a total summary.
-
-In order to only write the total summary, use the option \code{-total}. If there is only one file processed, the result will be similar except that subcounts (counts per section etc.) are not provided with the total count.
-
-
-\subsection{Customising the summary output}
-
-You may specify an output template to use instead of the default output formats. This will replace the output per file or for the total with output produced using this template.
-
-The template is a string with codes for inserting the count values and titles. To specify it, use the option \code{-template="\textit{template}"}. The encapsulating \code{"\ldots"} are required if the template contains spaces. You may insert line shifts by using \code{\bs{n}}.
-
-The counts may be included by using the counter keywords: \code{word}, \code{headerword}, etc. Other codes that may be inserted are: \code{\{SUM\}} to insert the count as specified by the \code{-sum} option, \code{\{TITLE\}} for the title (e.g. section name) and a header (same as title unless \TeXcount{} has replaced it), \code{\{ERROR\}} for the number of parsing errors, \code{\{WARNINGS\}} for the number of distinct warnings, or \code{\{NWARNINGS\}} for the total number of warnings. Some of these also have shortened forms\footnote{Previously, one-letter versions of some of these codes were permitted, but that is no longer the case.} like \code{\{ERR\}} and \code{\{WARN\}}.
-
-Conditional inclusion may be performed using the format \code{\{\textit{label}?\ldots?\textit{label}\}} where \code{\textit{label}} is one of the counter keywords, \code{SUM}, \code{ERROR} or \code{TITLE} (or their alternative forms). The enclosed text will then be included only if the corresponding value exists and is non-zero. If you wish to include an alternative text when the value is non-existant or zero, use the format \code{\{\textit{label}?\textit{if non-zero}|\textit{if zero}?\textit{label}\}}.
-
-Subcounts, e.g. per section, may be included by using \code{\{SUB|\textit{template}|SUB\}} with a separate template text specified for the subcounts. This will only be included if there is more than one subcount, and in order to conditionally include prefix and suffix you may use \code{\{SUB?\textit{prefix}|\textit{template}|\textit{suffix}?SUB\}}.
-
-Note that you have to insert line shifts yourself. \TeXcount{} will only insert one line shift after each file count, and not after the total count: if you process only one file and want only to output the total sum without a line shift at the end, use \code{-sum -total -template="\{SUM\}"}, which should give the same output as \code{-1 -sum} when there are no parsing errors.
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{\TeXcount{} instructions in the \LaTeX{} document}
-
-It is possible to give some instructions to \TeXcount{} from within the
-\LaTeX{} document. These can be used to control the parsing of the document and add custom made macro and environment handling rules directly from the \LaTeX{} document. The general format of these instructions is
-\codeline{\%TC:\textit{instruction \alt{parameters}}}
-which \LaTeX{} will interpret as a comment but \TeXcount{} will detect.
-
-Adding your own macro handling rules is relatively simple. While it is fairly easy to edit the script to add more rules, this has the disadvantage that the modifications will be lost if updating to a new version of \TeXcount. A better and more flexible solution is to include instructions to \TeXcount{} in the \LaTeX{} documents, alternatively to make a definition file in which new macro handling rules are defined. The \TeXcount{} instructions for doing this take the form
-\codeline{\%TC:\textit{instruction name parameters \alt{option}}}
-where \code{name} is either the macro name (including backslash) or environment name:
-%
-\input{sub_addrules}
-
-Note that macro handling rules are added successively throughout the session: i.e. if more files are parsed, handling rules from previously parsed files still apply. This has advantages as well as disadvantages. If you give a list of files with the rules specified in the first file, these rules will be applied to all the documents. However, if you use the \code{-inc} option, included files will be parsed only after \TeXcount{} has finished parsing the file in which they are included, so any rules specified in these will not apply to the initial document.
-
-A few additional \TeXcount{} instructions exist to control the overall parsing and counting:
-
-\input{sub_tc_other}
-
-
-\subsection{Parameter and content handling rules}\label{subsec:TC_addrule}
-
-There are a set of alternative rules that may be used for parsing macro parameters and environment contents. These rules, or \emph{parser states}, are identified by keywords:
-%
-\begin{description}
-\def\option#1[#2]#3{\item[#1:] (key: \code{#3} formerly \code{#2})}
-\option{Text}[1]{text, word, wd, w}
- Count as text (i.e. count words).
-\option{Header text}[2]{headertext, headerword, hword, hwd, hw}
- Count as header text.
-\option{Other text}[3]{otherword, other, oword, owd, ow}
- Count as float/caption text.
-\option{Displaymath}[7]{displaymath, dsmath, dmath, ds}
- Count as displayed math formulae.
-\option{Inline math}[6]{inlinemath, inline, imath, eq}
- Count as inlined math formulae.
-\option{To header}[4]{header, heading, head}
- Count header, then count text as \code{headertext} (transition state).
-\option{To float}[5]{float, table, figure}
- Count float, then parse contents as \code{isfloat} (transition state).
-\option{Preamble}[-9]{}
- Parse as preamble, i.e. ignore text but look for \code{preambleinclude} macros.
-\option{Ignore}[0]{ignore}
- Ignore text, i.e. do not count, but will still parse the code.
-\option{Float}[-1]{isfloat}
- Float contents, ignore text but look for \code{floatinclude} macros.
-\option{Strong exclude}[-2]{xx}
- Strong ignore which ignore environments, e.g. to use in macro definitions where
- \code{\bs{begin}}--\code{\bs{end}} need not be balanced.
-\option{Stronger exclude}[-3]{xxx}
- Stronger ignore, handles macros as isolated tokens without handling their parameters,
- to use with macro definitions like \code{\bs{newcommand}} and \code{\bs{def}}.
-\option{Exclude all}[-4]{xall}
- Ignore all, including unbalanced braces (e.g. used by \code{\%TC:ignore} and the \code{verbatim} environment). This rule may be used for environment contents, but not for macro or environment parameters or options since the exclusion causes \{ and [ to be ignored.
-\end{description}
-
-The keys are used to identify the rule. Environment content rules are simply specified by giving the desired key. Parameter rules are on the form \code{[\textit{rule},\textit{rule},\ldots]} with one rule provided per parameter, where each rule is either one of the above keywords or \code{option:\textit{rule}} to indicate an optional parameter on the form \code{[\ldots]}; alternatively, a single integer can be provided (not enclosed in \code{[]} to indicate that the indicated number of parameters should be ignored.
-
-The list of parser states is in order of increasing priority: i.e. when a parser state is specified as a rule for parsing a parameter or environment content, this will only take effect if it has higher priority than the current state. Thus, text within an ignored or excluded region will not be counted.
-
-The formerly used numberic codes are listed at the end of the keyword list for each state. Before version 4, these numeric codes were used to specify parsing rules, but although these should still work, using the named keywords is highly recommended.
-
-The transitional states indicates an incrementation of one of the counters and then change to another state: e.g. if the \code{header} rule is specified, this will first cause the header counter to be incremented, and then a change to the \code{headerword} state in which word counts are added to the header word counter. 
-
-\subsubsection{Adding a macro handling rule}
-
-The \TeXcount{} instruction for adding (or changing) a rule for how \TeXcount{} handles a specific macro takes the form
-\codeline{\%TC:macro \textit{macro-name parameter-rules}}
-where the macro name includes the backslash and the parameter rules can be an integer or a \code{[]}-enclosed list as explained above.
-
-If a list, \code{[\textit{rule},\textit{rule},\ldots]}, of parsing rules is provided, the macro will be assumed to take this number of parameters. Each rule is either a keyword signifying the rule, or parser state, with which the parameter will be parsed, or \code{option:\textit{key}} for optional \code{[]}-enclosed parameters. Additional \code{[]}-enclosed options, between or after the macro and the parameters, will be ignored.
-
-Macro handling rules specified for macros \code{\bs{\textit{name}}} automatically apply to \code{\bs{\textit{name}*}}: i.e. a \code{*} is automatically gobbled up as a macro modifier. 
-
-Here are some examples together with corresponding macro definitions:
-
-\begin{lstlisting}
-%TC:macro \refnote[text,othertext]
-\newcommand\refnote[2]{\textit{#1}\footnote{#2}}
-
-%TC:macro \newsection [header,ignore]
-\newcommand\newsection[2]{\section{#1}\label{sec:#2}}
-
-%TC:macro \NB 1
-\newcommand\NB[1]{\marginpar{#1}}
-\end{lstlisting}
-
-The predefined rules can easily be read off the script file: they are hash maps defined at the beginning of the script with names \code{TeXmacro}, \code{TeXenvir}, etc.
-
-\subsubsection{Adding an environment handling rule}
-
-Rules for environments may be added on the format
-\codeline{\%TC:envir \parm{name} \parm{parameter-rules} \parm{content-rule}}
-for parsing \code{\bs{begin}\{name\}\ldots\bs{end}\{name\}}. The parameter rules are specified as for the \code{macro} rule and is used to process the parameters that follow \code{\bs{begin}\{name\}}. The content rule is a single parsing rule to use on the environment content.
-
-\begin{lstlisting}
-%TC:envir theorem [] text
-\newtheorem{theorem}{Theorem}
-\end{lstlisting}
-
-\subsubsection{Adding rules that apply to the preamble and float contents}
-
-Within the preamble (from \code{\bs{documentclass}} to \code{\bs{begin}\{document\}}) and within floating objects (tables, figures, etc. parsed using the \code{float}/\code{isfloat} states), texts and macros are generally ignored. However, it is possible to specify particular macro handling rules that apply within these regions by using the \code{preambleinclude} and \code{floatinclude} \TeXcount{} instructions. These take the same format as the \code{macro} instruction:
-\codeline{\%TC:preambleinclude \textit{macro-name parameter-rules}}
-\codeline{\%TC:floatinclude \textit{macro-name parameter-rules}}
-It is possible for the same macro to specify different rules for preamble, floats and general use, although for most uses these should be expected to be the same.
-
-Preamble inclusion is typically used for macros like \code{\bs{title}} that define text that should be counted although it may be placed in the preamble. Another use is that macros that may occur in the preamble, like \code{\bs{newcommand}} and may contained unbalanced \code{\bs{begin}}--\code{\bs{end}} pairs, require a stronger exclusion than the regular \code{ignore} rule even in the preamble to ensure \TeXcount{} is not confused by these.
-
-Float inclusion is used e.g. for captions, and the parsing rules should normally be to count texts using the \code{otherword} parsing rule.
-
-
-\subsection{Count macro, either as words or in other counters}
-
-Some macros, e.g. \code{\bs{LaTeX}}, generate words and should be counted as words. Other macros can generate other elements, e.g. headers or figures. Rules for counting macros can be specified as
-\codeline{\%TC:macroword \textit{macro} \textit{number}}
-where the parameter is the number of words produced by the macro, or
-\codeline{\%TC:macroword \textit{macro} [\textit{countername},\ldots]}
-which causes each of the counters in the list to be incremented by one (or more if listed multiple times).
-
-The counters for counting the number of files, text words, etc. are stored in an array. In some cases, e.g. when \code{-sum=} is specified, the order of the counters in this array is used to specify the rule. However, in most cases the counters should be specified by keywords. The counters, their index number and keywords are:
-%
-\begin{description}
-\def\option[#1]#2{\item[#1.] (keys: \code{#2})}
-\option[0]{file} Number of files.
-\option[1]{text, word, wd, w} Number of words in text.
-\option[2]{headerword, hword, hwd, hw} Number of words in headers.
-\option[3]{otherword, oword, owd, ow} Words outside text, e.g. in floats/tables/figures.
-\option[4]{header, heading, head} Number of headers.
-\option[5]{float, table, figure} Number of floating environments, e.g. tables and figures.
-\option[6]{inlinemath, inline, imath, eq} Number of inlined mathematics formulae.
-\option[7]{displaymath, dsmath, dmath, ds} Number of displayed equations.
-\end{description}
-
-Examples of uses:
-
-\begin{lstlisting}
-%TC:macroword \TeXcount 1
-\newcommand\TeXcount{{\TeX}count}
-
-%TC:macroword acknowledge [header,hword]
-\newcommand\acknowledge{\section*{Acknowledgements}}
-\end{lstlisting}
-
-
-\subsection{Specifying file inclusion macros}
-
-In addition to \code{\bs{input}} and \code{\bs{include}}, which are the standard \LaTeX{} macros for file inclusion, there are packages such as \code{import} intended to enable organising files into subfolders. \TeXcount{}, from version~3, adds some support for macros that change the path from which files are included. If the user needs to add additional file inclusion macros, the format is
-\codeline{\%TC:fileinclusion \parm{macro} \parm{file-parameters}}
-where the file parameters are a comma separated list of keywords, each corresponding to a macro parameter. Available parameters are:
-%
-\begin{description}
-
-\item[\code{input}:] This is a special keyword to use with \code{\bs{input}}. The handling of the parameter values is as \code{file}, but the parameter itself is not required to be enclosed in \code{\{\}}.
-
-\item[\code{file}:] This parameter simply gives the name of or path to a file. If the file is not found, \TeXcount{} will append \code{.tex} and try again.
-
-\item[\code{texfile}:] This parameter gives the name of or path to a file, but \code{.tex} will be appended, and is the rule used by \code{\bs{include}}.
-
-\item[\code{dir}:] This parameter provides the path of a directory relative to the \code{\$workdir}, and adds this to the search path before including any files. This is used with the \code{\bs{import}} macro of the \code{import} package.
-
-\item[\code{subdir}:] This parameter provides the path of a directory relative to the current directory, and adds this to the search path before including any files. This is used with the \code{\bs{subimport}} macro of the \code{import} package.
-
-\item[\code{<bbl>}:] This is a special keyword to use with \code{\bs{bibliography}} to specify inclusion of the bibliography file. It is different from the other keywords in that it does not take a macro parameter.
-
-\end{description}
-
-Examples showing how some existing macros, from basic \LaTeX{} and from the \code{import} package, are defined:
-
-\begin{lstlisting}
-%TC:fileinclude \input input
-\input macros.tex
-%TC:fileinclude \include texfile
-\include{intro}
-%TC:fileinclude \import dir,file
-\import{supplements/}{overview.tex}
-%TC:fileinclude \subimport subdir,file
-\subimport{tables/}{data.tex}
-\end{lstlisting}
-
-
-\subsection{Adding subcount break points}
-
-By specifying \code{-sub}, \TeXcount{} can produce subcounts, e.g. per section. Alternatively, or in addition, explicit break points can be entered in the \LaTeX{} document using the TC-instruction \code{break}. These take the form:
-\codeline{\%TC:break \textit{title}}
-A title (or name) may be given to identify the break point.
-
-If you define new section macros or macros you wish to cause a break point, these may be specified using the TC-instruction \code{breakmacro}:
-\codeline{\%TC:breakmacro \textit{macro} \textit{label}}
-This defines the given macro to cause a break point, and uses the given label to indicate the type of break (e.g. Section, Chapter, etc.).
-
-
-\subsection{Ignoring segments of the file}
-
-The TC-instruction \code{ignore}, later canceled by \code{endignore}, may be used to turn of all counting in a segment of the \LaTeX{} file. The ignored segment should thus be started by
-\codeline{\%TC:ignore}
-and ended by
-\codeline{\%TC:endignore}
-causing all text inbetween to be ignored.\footnote{In older versions, \TeXcount{} would still parse this text and might thus be affected by unbalanced braces. As of version 2.3, however, this should be fixed to make the ignore instruction more robust.}
-
-
-\subsection{Bibliography inclusion}
-
-In order to include the bibliography in the word counts, you can either specify \code{-incbib} on the command line, or use \TeXcount{} instruction
-\codeline{\%TC:incbib}
-which has the same effect: it specifies handling rules for the \code{\bs{bibliography}} macro and \code{thebibliography} environment that causes the bibliography to be included in the count, and if necessary the \code{.bbl} file to be included (without requiring \code{-inc} or \code{-merge}).
-
-
-\subsection{Text substitution prior to parsing}
-
-There are cases where a macro needs to be substituted with a text prior to parsing. One such case is when a macro contains a file path which is later used by a file inclusion macro. Since \TeXcount{} does not actually expand the macros, it will not be able to generate the file path from the macro. Instead, one may perform an explicit substitution
-\codeline{\%TC:subst \parm{macro} \parm{text}}
-which will then cause all occurrences of the macro to be substituted by the provided text prior to parsing. Note that this substitution will therefore also be found in the verbose output. 
-
-\begin{lstlisting}
-\newcommand\chappath{chapters}
-%TC:subst \chappath chapters
-\input \chappath/chapter1
-\end{lstlisting}
-
-Note that the substitution is placed \emph{after} the \code{\bs{newcommand}} definition. Otherwise, the substitution would have taken effect, changing that line to \code{\bs{newcommand} chapters/chapters}.
-
-
-
-\subsection{Adding a new counter}
-
-Initially, \TeXcount{} has eight different counters: file, text words, header words, other words, number of headers, number of floating objects, number of inlined formulae, and number of displayed formulae. However, it is possible to add more counters, e.g. to count footnotes separately. The syntax is
-\codeline{\%TC:newcounter \parm{name} \opt{description}}
-where the given name is used as keyword to refer to this counter. If no description is provided, the name will be used as description. A new counter is then added, and a parsing rule (parser state) with the same name is added which may be used in specifying macro and environment handling rules.
-
-The following example shows how two different counters are added: one to count the number of footnotes, and another to count the words in footnote.
-
-\begin{lstlisting}
-%TC:newcounter fwords Words in footnotes
-%TC:newcounter footnote Number of footnotes
-%TC:macro \footnote [fwords]
-%TC:macroword \footnote [footnote]
-Each footnote\footnote{Words in footnotes will be counted separately.} will be counted.
-\end{lstlisting}
-
-Note that we have to specify one rule for counting the words in footnotes, and another rule for counting the footnotes. Unlike headers and floating bodies, there are no transition states available that can do both.
-
-
-% ---------------------------------------------------------------------------
-
-\section{Using an option file}
-
-If you have a lot of settings, e.g. output template and TC commands for specifying parsing rules, you may place these into a file and include this using \code{-opt=\textit{file}}.
-
-The format of this file is quite simple: each line is read as one option, so different options should not be placed on the same line. If some options are so long you need to break the line, e.g. for specifying an output template, you can do so by placing \code{\bs{}} at the start of lines that continue the previous line.
-
-You may enter TC commands just as in the \LaTeX{} code by starting the line with \code{\%} instead of \code{TC:}. Using these, you may include specifications of parsing rules.
-
-Blank lines and lines starting with \code{\#} are ignored and may thus be used to add comments to the option file. So are leading spaces, which allows lines to be indented. Line breaks may be inserted by \code{\bs{n}}.
-
-Here is an example which sets the total sum to be the number of words (not including formulae), subcounts by section, parses included files, and adds an output template.
-
-\begin{lstlisting}[frame=single]
-### Options to use with TeXcount
-
-# Counting options
--sum=1,1,1
--sub=section
--inc
-
-# Macro rules
-%macro \url 1
-%envir sourcecode 0 0
-%macroword \TeXcount 1
-
-# Path used in file inclusion (\chapterpath filename)
-%subst \chapterpath chap/
-
-# Output template
--template=
-   \::: {title} :::\n
-   \Words: {sum}\n
-   \Formulae: {6} + {7}\n
-   \{5?Number of floats: {5}\n?5}
-   \{SUB? - {sum} words in {title}\n?SUB}
-\end{lstlisting}
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{Customising \TeXcount{}}
-
-\TeXcount{} is a self-contained Perl script: no external packages or resources required except that you need to have Perl installed to run it. Unfortunately, as with much of Perl code since Perl does not itself encourage structured programing, after expanding somewhat in size, it is not the most readable of codes. However, there may still be cases where you might yourself want to modify the code.
-
-There are some things that may be modified quite easily even without knowing Perl.
-
-\begin{description}
-
-\item[Preset startup options]On one of the first lines of the code, the list \code{@StartupOptions} is defined. A list is simply a sequence of values (an array) on the form \code{(\textit{value},\textit{value},\ldots)}. As it stands, this list is empty, but you may add startup options to be included prior to command line options when you run \TeXcount{}. E.g. if you change this to \code{("-inc")} it will automatically add the \code{-inc} option so you don't have to do that yourself every time you run \TeXcount{}.
-
-\item[Adding macro handling rules]While you may add macro handling rules using \code{\%TC:} commands either in the document or in a separate option file, this is inconvenient for large numbers of macros or if you want these rules always to be included. Also, you might want to add such rules for specific packages. In either case, it might be practical to add these directly to the \TeXcount{} code. \TeXcount{} stores the rules in hashes (maps from a key to a value) named \code{\%TeXmacro}, \code{\%TeXenvir}, etc. There is more documentation on each of these in the code itself, and you may also inspect how rules have been defined for other macros and environments.
-
-\item[Output style]The ANSI colour codes for different levels of verbosity are encoded in the \code{\%STYLES} hashes and may be changed. The HTML style is encoded in the method \code{html_head()} and is easily modified. 
-
-\item[Character and word definitions]\TeXcount{} identifies words as those that match one of a given set of regular expressions (defined in \code{@WordPatterns}). Note that \code{@WordPatterns} is changed by options \code{-chinese}, \code{-japanese} and  \code{-letters}. The pattern that is used within the word patterns to recognise letters is stored in \code{\$LetterPattern}. This is replaced if the \code{-relaxed} or \code{-restricted} option is set. Changing these definitions may be useful if you have special characters or wish to define words differently.
-
-\end{description}
-
-
-% ---------------------------------------------------------------------------
-
-\section{Modifying the \TeXcount{} script}\label{sec:code}
-
-\TeXcount{} is written in Perl, and although hardly the best structured and documented code ever seen, I have tried to structure and document it somewhat. In particular, some parts of the code should be easily modifiable even without in-depth knowledge of Perl or the \TeXcount{} script: e.g. the macro handling rules.
-
-For more aid on how the \TeXcount{} script is coded and organised, please consult the Technical Documentation. However, here is a very brief overview:
-
-\begin{description}
-
-\item[Header and imports:] The shebang (\code{\#!}) and package imports (\code{use \parm{package}}).
-
-\item[Global variables (and some methods related to these):] This defines and initialised global variables related to option settings, state variables used in parsing and counting (including functions for unterpreting these), variables and hashes for storing macro handling rules, and character class definitions (must be defined before use).
-
-\item[Main program:] This simply contains a call to the \code{MAIN} routine with the command line arguments.
-
-\item[Routines/functions/procedures:] The first procedure defined is \code{MAIN} which contains the program flow, then follows other subroutines. Routines with capitalised initial letters indicate high-level routines, while routines starting with underscores (\code{_}) are low-level routines.
-
-\item[Text data:] At the end of the file is a \code{__DATA__} region containing text data used by the help routines.
-
-\end{description}
-
-Perl will first process the setup section which defines global variables, arrays and hashes. It then executes the main section (consisting of the call to \code{MAIN}), whereafter it exits. The subroutines and text data follow after the \code{exit}.
-
-
-
-% ---------------------------------------------------------------------------
-
-\section{License}
-
-The \TeXcount{} package---script and accompanying documents---is distributed
-under the \LaTeX{} Project Public License (LPPL)
-\codeline{\url{http://www.latex-project.org/lppl.txt}}
-which grants you, the user, the right to use, modify and distribute
-the script. However, if the script is modified, you must change its
-name or use other technical means to avoid confusion with the original script.
-
-
-\end{document}

Deleted: trunk/Master/texmf-dist/doc/support/texcount/TechDoc.pdf
===================================================================
(Binary files differ)

Deleted: trunk/Master/texmf-dist/doc/support/texcount/TechDoc.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/TechDoc.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/TechDoc.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,620 +0,0 @@
-\documentclass{article}
-\usepackage[T1]{fontenc}
-\usepackage{a4wide}
-\usepackage{times}
-\usepackage{listings}
-\usepackage{url}
-\usepackage{color}
-
-\include{macros}
-\newcommand\Obj[1]{\textsl{#1}}
-\newcommand\wild{\ldots}
-\newcommand\eqsim{$=\sim$}
-
-% CGI exclusion macro
-%\def\CGI#1\CGIend{} % To exclude CGI text
-\def\CGI#1\CGIend{#1} % To include CGI text
-\def\CGIend{}
-
-%%% Title
-\title{%
-\LARGE \TeXcount\\
-\Large Technical documentation\\
-\Large Version \version\copyrightfootnote
-}
-\author{Einar Andreas R{\o}dland}
-
-\sloppy
-
-\begin{document}
-
-\maketitle
-
-{\abstract%
-The aim of this document is to explain the implementation details of \TeXcount{} with the aim of aiding anyone who wishes to modify the Perl code (including the author). To be of practical use, it will require some knowledge of Perl and familiarity with \TeXcount{}: while full fledged development of \TeXcount{} requires a working knowledge of Perl, code modification may often be done with only limited experience with Perl.
-}
-
-{\scriptsize\tableofcontents}
-
-\pagebreak
-
-
-
-\section{Introduction}
-
-
-\subsection{\TeXcount{} versioning}
-
-The version number is on the form \code{\textit{major}.\textit{version}.\textit{subversion}.\textit{build}}. Main releases contain only the first to terms, implying that subversion and build number are both zero. Minor releases only contain the first three terms. Main as well as minor releases should be functional, tested versions. The subversion number can also be \code{alpha} ($\alpha=-2$) or \code{beta} ($\beta=-1$) for which the testing has been limited. The build number is used to keep track of changes and versions during development: they may be made available, but are purely for testing.
-
-
-\subsection{Some things you need to know about Perl}
-
-\TeXcount{} is written in Perl. The entire script, including macro rules and help texts, is contained in one file. This makes the file somewhat big, and modularisation of the code is therefore not strictly enforced. I have however tried to structure the code somewhat.
-
-Perl has a few build-in data structures which are referenced in somewhat different manners. In this document, it will be important to recognize the difference between three different types of data: regular variables, arrays and hash maps.
-
-\begin{description}
-
-\item[\code{\$\textit{name}=\textit{value}}:] The \code{\$} at the start indicates that it is a Perl variable. The value can be numbers or strings, or it can be a reference which points to another data object (e.g. array or hash map).
-
-\item[\code{@\textit{name}=(\textit{value},\ldots)}:] The \code{@} at the start indicates that this is an array. The positions are indexed from 0 to $\textrm{length}-1$.
-
-\item[\code{\%\textit{name}=(\textit{key}=>\textit{value},\ldots)}:] The \code{\%} at the start indicates that this is a hash map that maps keys to values. The key will usually be a string, but can also be a number.
-
-\item[\code{sub \textit{name}} \{\ldots\}:] This defines a subroutine: function or procedure. Normally, these can be defined anywhere in the script, and I have generally placed them after the main program.
-
-\end{description}
-
-Note that \code{(\textit{value},\ldots)} is a list of values, not an array or a hash: it simply produces a list of values that are used to fill the defined array or hash. Arrays and hashes can also be produced directly by \code{[\textit{value},\ldots]} or \code{\{\textit{key}=>\textit{value},\ldots\}} respectively. Both these return a reference to the array/hash rather than the array/hash itself.
-
-In much of the code, hashes are passed by reference: e.g. if \code{\%hash} is a hash, \code{\$href=\bs{\%hash}} stores a reference to the hash, where the leading \code{\bs{}} causes a reference to be returned rather than the hash itself. Retreiving a value from the hash is done by \code{\$hash\{\textit{key}\}} or \code{\$href->\{\textit{key}\}} if the hash is accessed by reference. Note that individual values in array and hashes are prefixed by \code{\$}: i.e. \code{\$\textit{array}[\textit{index}]} and \code{\$\textit{hash}\{\textit{key}\}}.
-
-\TeXcount{} makes extensive use of regular expressions (regex): expressions on the form \code{\$\textit{string}\eqsim/\textit{pattern}/} and \code{\$\textit{string}\eqsim s/\textit{pattern}/\textit{replace}/}. In \TeXcount{}, the main use is to recognize (and remove) tokens (words, macros, spaces, etc.) at the start of a string of \LaTeX{} code. Some of these may be fairly simple to understand, while others may be more complex.
-
-
-
-\section{Overview}
-
-
-\subsection{Code structure}
-
-\TeXcount{} is written in Perl, and although hardly the best structured and documented code ever seen, I have tried to structure and document it somewhat. In particular, some parts of the code have been written with modifications in mind so that users can make their own changes without in-depth knowledge of Perl or the \TeXcount{} script.
-
-Here's a quick walk-through of the code structure and comments on how easily the code may be modified. Some parts of the code are marked as \emph{CMD specific}. There are two version of the script: the CMD version intended for command line use, and the CGI version used with the web interface. The one you have is the CMD version.
-
-\begin{description}
-
-\item[\em HEADER AND IMPORTS:] \textit{The shebang (\code{\#!}) and \code{use} imports.}
-
-\item[\em INITIAL SETUP:] \textit{These set up global variables prior to execution.}
-
-\item[Settings and setup variables:] The start of the script sets of initial settings and variables. Many of these may be modified by command line options, but if you want to change the default behaviour these may be changed. However, note that there is a list \code{@StartupOptions} intended for this: initially, it is empty, but this is probably the simplest place the change startup options.
-
-\item[Internal states:] As of version 2.3, internal state identifiers (which are numerical codes) have been defined as \code{STATE}, \code{TOKEN} and \code{CNT} variables, and these are also defined here. A few subroutines for interpreting these states have been included here, although most subroutines are defined after the main code, since they are intimately tied to the state's numerical values. None of these are intended to be modified.
-
-\item[Styles:] The style definitions basically define which elements to print for each of the verbosity levels. These map element names to ANSI colour codes. When used with HTML, the element names are used as tag classes. If you wish to change the ANSI colour scheme, or change which elements are written in each verbosity option, these may be changed.
-
-\item[Word pattern definitions:] This section contains regular expression patterns for identifying words and macro options. In addition, the additional character classes defined by \TeXcount{} are defined here. If you have special needs or wishes, modifying these definitions may be an option.
-
-\item[\TeXcount{} parsing rules:] This is the section in which the main rules for interpreting the \LaTeX{} code is specified: the exception is a few hard-coded rules that do not follow these general patterns. These are hashes that map the macro or environment name to the macro handling rules. First, the default rules are defined, then packages specific rules are defined.
-
-\item[\em MAIN:] \textit{This is the top-level code which gets executed. All else is done through calls to subroutines.}
-
-\item[Main \TeXcount{} code:] This is the main code that is run. It is very simple: just a call to the method \code{MAIN} passing the command line options.
-
-\item[\em SUBROUTINES:] \textit{The subroutines are organised into blocks. Subroutines names use capital letters or initials if they are main routines (like public in other languages) to be used at the top-level, lower case if they may be used throughout but are considered to be lower-level subroutines, prefixed by one or two underscores (_) if used only within the block.}
-
-\item[Main routines:] The \code{MAIN} routine gives the general processing flow. This in turn calls routines to parse to command line options, process/apply the options, parse the \TeX/\LaTeX{} files, and finally summarise the final results. The main routines are CMD specific.
-
-\item[CMD specific subroutines:] These are subroutine versions that are CMD specific, e.g. file inclusion and ANSI colours. Their location is somewhat illogical: logically, they might belong later together with related subroutines, but have been placed this early because they are specific to the CMD (or CGI) version.
-
-\item[Option handling:] After parsing the options, the option values are processed using these subroutines. Some of the option handling operations call on global variables, whereas some are more hard-coded. Like the global variables, if you have special wishes or needs, there may be parts here that can be modified quite easily to change default settings or effects of specific options.
-
-\item[\TeX{} object:] The main role of the \code{TeX} object (which is technically not an object in the ordinary sense but just a hash) is to be a container object which links to the \TeX/\LaTeX{} code, the word count object, etc. The \code{TeX} object pertaining to any parsed \TeX/\LaTeX{} file is passed along from subroutine to subroutine, usually called \code{\$tex}. The \code{Main} object produced by \code{getMain} is a simple substitute for the \code{TeX} object for use when none is available, e.g. to catch errors not specific to any particular \code{TeX} object. 
-
-\item[File reading routines:] These are used to read files and STDIN.
-
-\item[Parsing routines:] These contain the main routines for parsing the \TeX/\LaTeX{} code. The main worker method is the \code{_{}parse_{}unit} which parses a block of code: the \emph{unit}. A unit of code may be the contents of an environment, a \code{\{\ldots\}} group, a macro option or parameter, etc. The parsing of one unit is determined by the parsing state, which is passed to the parsing method, and the end marker which indicates which token marks the end of the unit. Different subroutines are then used to process the different types of code: macros, environments, TC instructions, etc. Amongst these routines are also routines for converting the parsed code into tokens, which is done one token at the time which is then removed from the start of the code.
-
-\item[Count object and routines:] The count object contains the counters as an array, plus titles and labels; in addition it can contain a list of subcounts which are themselves count objects. The count object is used for each file, but also to summarise multiple files, and region counts within files (e.g. per section). The \code{TeX} object contains an active count object to which newly counted words, equations, etc. get added. However, each \code{TeX} object also has a summary count object which will contain the final sum. 
-
-\item[Output routines:] First, there are some routines for general output, i.e. independent of specific \code{TeX} objects. There are then some routines for formatting output, e.g. for the verbose output. There are also routines for printing count summaries in various formats. A special set of routines exist for printing the verbose output itself, and some of these are also involved in the parsing.
-
-\item[Help functions:] These routines are used to print help.
-
-\item[HTML functions:] These are routines for producing HTML output. In particular, the HTML style is defined here and may be easily modified.
-
-\item[Text data:] Some texts are not hard-coded into the script, but added as text data at the end. There are some routines defined to handle the text data, and then the text data itself.
-
-\end{description}
-
-Perl will first process the setup section which defines global variables, arrays and hashes. It then executes the main section (consisting of the call to \code{MAIN}), whereafter it exits. The subroutines and text data follow after the \code{exit}.
-
-\CGI
-There is a separate CGI version of the \TeXcount{} script for the web service. While this is mostly the same as the regular command line version, there are some differences in how options are set and \LaTeX{} documents are read. Occasional differences in the CGI version will be commented on, but the main emphasis will be on the command line version of \TeXcount{}.
-\CGIend
-
-
-\subsection{Global variables}
-
-A number of globally defined variables, including constants, arrays and hashes, are defined at the start of the program. These fall into a few different categories.
-
-There are a number of variables defined for storing options and settings, many of which can be modified by command line options. In addition, there are few variables for global summaries and statistics, as well as a few for internal states during parsing.
-
-Global constants are defined to represent different states and counters. One set of constants,\code{\$CNT_\wild}, specify the position of the different counters in the counting array; parser states are defined as \code{\$STATE_\wild}; token types are named \code{\$TOKEN_\wild}. For example, the parsing state \code{\$STATE_TEXT} indicates that a block of \LaTeX{} code should be parsed and have words counted as text words. The constants simply take numerical values, but help make the code more readable. Together with some of these are defined functions for interpreting or transforming these 
-
-Alternative settings for different options are defined in a number of hashes, e.g. \code{\%STYLES} indicating which tokens to print at different levels of verbosity, and \code{\%NamedLetterPattern} which stores alternative regex rules which may be used to recognize letters.
-
-A special set of global settings are the macro handling rules that are stored in a number of hashes: \code{\%TeXmacro}, \code{\%TeXenvir}, etc. as well as similar sets of hashes for package specific rules.
-
-
-\subsection{\TeXcount{} objects}
-
-First, note that what is referred to as objects here are just hash maps with a predefined set of values. However, these serve the same purposes as objects. There are no explicit class specifications defining these, just a set of functions returning hashes that contain the required keys, some of which may even be optional. Still, it is useful to think of them as objects, and their main purpose is to encapsulate data so that they can conveniently be passed around.
-
-\begin{description}
-
-\item[The \Obj{Main} object]
-Each \TeXcount{} session instantiates a singleton \Obj{Main} object. This is used as a replacement when no \Obj{TeXcode} object is available for capturing (counting and storing) error messages and warnings.
-
-\item[The \Obj{TeXcode} object]
-The \Obj{TeXcode} object encapsulates the \LaTeX{} code that is to be parsed as well as counts and lists of reported errors. In the code, it is generally referred to using the \code{\$tex} variable.
-
-\item[The \Obj{count} object]
-The \Obj{count} object is primarily a container for the array of counts: i.e. the array containing word counts and counts of headers, equations, etc. However, it also keeps track of subcounts from contained files, sections, etc.
-
-\end{description}
-
-A more detailed explanation of the different objects is provided in section \ref{sec:objects}
-
-
-\subsection{Main program flow}
-
-The main program consists of a single call to the procedure \code{MAIN}. This does the following:
-
-\begin{description}
-
-\item[\code{Initialise}:]
-Most of the initialisation is done when defining the global variables, but some initalisation required code execution: e.g. OS specific initialisation.
-
-\item[\code{Check_Arguments}:]
-Runs an initial check of the command line arguments passed to \TeXcount{}, e.g. for \code{-help}, and may exit \TeXcount{}.
-
-\item[\code{Parse_Arguments}:]
-Parses the command line arguments, setting option variables, and returns the list of \LaTeX{} files to be parsed.
-
-\item[\code{Apply_Options}:]
-This applies the options set either in the initial setup or initialisation, or when parsing the arguments. While most options are set directly during the argument parsing, settings that may depend on multiple options or options that should be applied only once, e.g. initialising the output and writing the HTML header, are applied here.
-
-\item[Parse files (or write help or error message):]
-The file parsing calls \code{Parse_file_list} with the list of files to be parsed, and this returns the total count object. Apart from this, help, summary output and error reports are produced if required.
-
-\item[\code{Close_Output}:] This just makes sure the output channel is properly closed, e.g. writing closing HTML code.
-
-\end{description}
-
-\CGI
-In the CGI version of \TeXcount{}, \code{Initialise}, \code{Check_Arguments} and \code{Parse_Arguments} are replaced by a single call to \code{Set_Options}. Also, since the CGI version only processes one file, alternatives for parsing and reporting on multiple files are not required and is instead replaced by a single call of \code{parse}.
-\CGIend
-
-
-\subsection{How \TeXcount{} processes \LaTeX{} documents}
-
-The \code{parse} routine is the entry point for parsing \LaTeX{} code of a single file. It takes a \Obj{TeXcode} object, the container object of a \LaTeX{} document and its corresponding \Obj{count} object, performs the parsing of the entire document. The counts are stored in the counter in the \Obj{TeXcode} object.
-
-The hierarchy of delegation from \code{MAIN} down to \code{parse} is as follows:
-
-\begin{description}
-
-\item[\code{MAIN}] calls \code{Parse_file_list} with a list of files which return the total count (a count object) for \code{MAIN} to report.
-
-\item[\code{Parse_file_list}] calls \code{parse_file} for each file in the provided file list, and for STDIN (identified by \code{\$_STDIN_}) if the option to parse standard input has been set. It then aggregates the counts returned by \code{parse_file} into a total count which it returns.
-
-\item[\code{parse_file}] calls \code{_add_file}, first for the main file, and then again for each included file if file inclusion (\code{-inc}) has been set. The aggregation of counts is done by \code{_add_file} into a total count object provided by \code{parse_file}, and this total count object is then returned by \code{parse_file} upon completing the parsing of the main file as well as all included files.
-
-\item[\code{_add_file}] reads the file into memory, creates a \Obj{TeXcode} object which encapsulates the \LaTeX{} code and the counts, and calls \code{parse} to perform the parsing of this \Obj{TeXcode} object. The counts are added directly into the \Obj{TeXcode} object, so only the \Obj{TeXcode} object reference is being passed around.
-
-\end{description}
-
-\CGI
-In the CGI version, \code{parse} is called directly from \code{MAIN} since only one document can be parsed and no file inclusion is possible. 
-\CGIend
-
-
-\subsection{\LaTeX{} code parsing by \code{parse}}
-
-The \code{parse} routine takes a \Obj{TeXcode} object and parses this to the end. It is, however, only the entry point for parsing the \LaTeX{} code: other routines do the main parsing with \code{_parse_unit} being the main work horse. In fact, \code{parse} only initiates the parsing, calling \code{_parse_unit} repeatedly until the end of the file.
-
-The \code{_parse_unit} routine is used to parse one unit or block of \LaTeX{} code: a unit/block can be the a part of the document enclosed in e.g. \{\ldots\} or \code{\bs{begin}\ldots\bs{end}}, or based on context enclosed by e.g. \code{[\ldots]}, or the document at the top level. It is passed the \Obj{TeXcode} object to parse, a parsing state instructing it how the block should be parsed, and optinally a block-end token which tells \code{_parse_unit} when the block ends. The \code{_parse_unit} routine is then called recursively whenever a unit/block is encountered that requires a separate parsing state or closing token.
-
-The parsing state indicates if the block is part of the main text in which words should be counted, a header, equation contents, should be excluded, etc. and is the only state variable of the parser. In addition to the regular states with which the \LaTeX{} code is parsed, there are transition states. E.g. \code{\$STATE_TO_HEADER} indicates that the block should be counted as a header and the contents should then be parsed using \code{\$STATE_TEXT_HEADER} as specified in \code{\%transition2state}.
-
-The document is tokenized, and \code{_parse_unit} retrieves one token at the time by calling \code{next_token}. Depending on the active parsing state and token, different rules (most with their own subroutines) are applied. These rules add to the \Obj{count} object of the \Obj{TeXcode} object by calling \code{_inc_count} and set the presentation style of the verbose output included which tokens to print. The active token and its style is by default stored in the \Obj{TeXcode} object and printed to the verbose output by \code{next_token} upon retrieving the next token, although this is occasionally overrun by calls to e.g. \code{flush_next}.
-
-When \code{_parse_unit} encounters a new block/unit, it will determine the state with which this unit should be parsed based the present state and the context that defines the unit.
-
-
-\subsection{Summary statistics}
-
-The counts are stored in the \Obj{TeXcode} object: subroutines performing the actual parsing increments the appropriate counter upon processing the parsed tokens. The \Obj{TeXcode} object contains a main \Obj{count} object from which summary output is generated. The \Obj{count} object can also contain a list of subcounts, themselves \Obj{count} objects, which may also be presented in the summary.
-
-Depending on options set and the number of files parsed, summary output can range from a single number of the total word count, to an extensive summary for each spcified file with separate summaries for each included file, as well as a total summary.
-
-
-
-\section{Global constants and variables}
-
-There are a number of global variables defined at the start of the script for storing options and settings as well as global counters.
-
-In addition, there are sets of global constants, as well as other globally defined variables, hashes and arrays. Here, we outline the main groups.
-
-
-\subsection{Global constants}
-
-There are a few sets of global constants. The use of global constants makes the code more readable. The sets of global constants are:
-
-\begin{description}
-
-\item[\code{\$STATE_\wild}:] Parsing states, e.g. \code{\$STATE_TEXT} for parsing \LaTeX{} code as regular text and \code{\$STATE_IGNORE} for regious that should not be counted.
-
-\item[\code{\$CNT_\wild}:] Index pointing to the location in the counter array used for a specific count, e.g. \code{\$CNT_WORDS_TEXT=1} indicating that words in text are counted in position 1 of the array.
-
-\item[\code{\$TOKEN_\wild}:] Token types, e.g. \code{\$TOKEN_WORD} and \code{\$TOKEN_MACRO}. When a token is parsed, the \Obj{TeXcode} object stores the token type as well as the token, which can then be used to determine how the token should be interpreted.
-
-\end{description}
-
-\subsubsection{Counter indices: \code{\$CNT_\wild}}
-
-The \Obj{count} object contains an array with the following counts: number of files, number of words in text, number of words in headers, number of words in captions, number of headers, number of floating objects/tables/figures, number of inline equations, number of displayed equations. Storing these are the main purpose of the \Obj{count} object.
-
-Each count has a fixed position in the array, and the \code{\$CNT_\wild} constants provide the positions of each count: e.g. \code{\$CNT_WORDS_TEXT=1} indicates that the counter for words in the text is stored in position 1 of the array. Originally, these positions were hard-coded and directly related to the parsing states, but by using these constants, and keeping the counter indices distinct from the parsing states, the code becomes both more readable and more flexible in case of future changes.
-
-\subsubsection{Parsing states: \code{\$STATE_\wild}}
-
-The parsing states fall into two categories.
-
-First there are parsing states used during the parsing of a unit/block: e.g. \code{\$STATE_TEXT}, \code{\$STATE_MATH}, \code{\$STATE_IGNORE}, ldots. In some of the states, words are counted either as text words, header words or captions words; in other states, words are ignored and the state primarily influences how the parsed \LaTeX{} code is styled in the verbose output.
-
-Secondly, there are transitional states: e.g. \code{\$STATE_TO_HEADER} which indicates the start of a header which should first cause the header count to be incremented and then the contained text to be parsed as header text using the parsing state \code{\$STATE_TEXT_HEADER}. The handling of the transitional states are encoded in \code{\%transition2state} and performed by the \code{transition_to_content_state} routine which is called by \code{_parse_unit}.
-
-Macro handling rules specify how many parameters the macro takes and which parsing states are used to parse each parameter; for environments, it additionally specifies a parsing state for the contents of the environment.
-
-Originally, before implementing the \code{\$STATE_\wild} constants, fixed numerical values were hard coded into the Perl code, and these numerical codes were required for adding new rules. For the macro rules specified within the Perl code of \TeXcount{}, the original numerical codes remain in the initial rule specification. However, from version 2.3 of \TeXcount{}, the intention is that users should no longer use these numerical codes to specify new macro handling rules, but instead use a set of keywords: e.g. \code{text}, \code{header}, \code{ignore}, etc. For this purpose, a hash \code{\%key2state} is defined which maps keywords to parsing states. The original numerical codes are included in this map in part for backward compatibility, but also because this key-to-state map is applied to the macro handling rule hashes \code{\%TeXmacro}, \code{\%TeXenvir}, etc. The \code{\%key2state} has is set up e.g. with
-\codeline{add_keys_to_hash(\bs{\%key2state},\$STATE_TEXT,1,'word','w','wd');}
-which maps the keys \code{1}, \code{'word'}, \code{'w'} and \code{'wd'} all to the value \code{\$STATE_TEXT} (which need not be 1!). However, this specification, which is used to convert keywords to states during initialisation of the macro handling rules and later if adding new rules, ensures that the original numberical codes will be handled as before: \TeXcount{} will be backward compatibile with respect to using the numberical codes to add new macro handling rules through \code{\%TC} commands. 
-
-Although in theory the parsing state numerical codes could be changed without any effect to the code, there are still a few places where the actual numerical values are used: e.g. the routine \code{state_to_text}.
-
-\subsubsection{Token types: \code{\$TOKEN_\wild}}
-
-When the \LaTeX{} code is tokenized, i.e. the string containing the \LaTeX{} code is converted to tokens like words or macros, not only is the token stored in the \Obj{TeXcode} object, but a token type is stored as well indicating if the object is a word, macro, space, symbol, bracket, etc. To make the Perl code more readable, these token type, although just integer values, are represented by constants \code{\$TOKEN_\wild}.
-
-When \code{_parse_unit} parses the \LaTeX{} code, it frequently uses the token type stored in the \Obj{TeXcode} object rather than the token itself to determine how to interpret the parsed tokens.
-
-
-\subsection{Option alternatives}
-
-Some options result in choosing between a number of alternatives for parsing, counting or presentation. These alternatives tend to be defined in arrays or hashes. When an alternative is selected, the corresponding value(s) are copied to a variable, array or hash which may then later be applied or further processed.
-
-\begin{description}
-
-\item[\code{\%BreakPointOptions}:] For keywords like \code{section} or \code{chapter}, this defines which macros indicate a new break point (i.e. initiates a new subcount).
-
-\item[\code{\%STYLES}, \code{\%STYLE}:] The \code{\%STYLES} hash contains different sets of style definitions, used to define the style with which tokens are printed, and are used to set the \code{\%STYLE} hash by \code{Apply_Options} after the options have been processed. Each value of the \code{\%STYLES} is a hash mapping style name to ANSI colour styles. For a given style, only style names defined in the style are printed in the verbose output. If ANSI colour coded output is used, these are the colour codes; otherwise, the ANSI colour styles are not themselves used, but the style name must still be included in the hash to enable the token to be printed.
-
-\item[\code{\%NamedLetterPattern}, \code{\$LetterPattern}:] Named regex patterns are defined in \code{\%NamedLetterPattern} where the selected pattern is stored in \code{\$LetterPattern}. This regex pattern defines what is recognized as letters when parsing \LaTeX{} code.
-
-\item[\code{\%NamedWordPattern}, \code{@WordPatterns}, \code{\$WordPattern}:] Named word patterns are defined in \code{\%NamedWordPattern}. The selected patterns are stored in the array \code{@WordPatterns}. Letters are indicated by a special character, and when the options are applied replaced by \code{\$LetterPattern} and merged into a single regex stored in \code{\$WordPattern}.
-
-\item[\code{\%NamedMacroOptionPattern}, \code{\$MacroOptionPattern}:] Named regex patterns are stored in \code{\%NamedMacroOptionPattern}, and the selected pattern copied to \code{\$MacroOptionPattern}. This pattern is used to recognize macro options which should be excluded from word counts.
-
-\item[\code{\%NamedEncodingGuessOrder}:] For each named language, this gives an array of encodings to try if none is given.
-
-\end{description}
-
-
-
-\section{Details of the \TeXcount{} objects}\label{sec:objects}
-
-These objects are simply hashes that are created with a given set of keys. Some keys may, however, be optional.
-
-
-\subsection{The \Obj{Main} object}
-
-The \Obj{Main} object is used instead of the \Obj{TeXcode} object to capture errors and warnings. It is created by the \code{getMain} routine. The values (keys) it contains are:
-
-\begin{description}
-
-\item[\code{errorcount}:] Numerical value, initialised to 0, used to count the number of errors reported.
-
-\item[\code{errorbuffer}:] Array, initialised to an empty array, used to buffer error messages reported before output is available: e.g. before the header or HTML header has been printed.
-
-\item[\code{warnings}:] Hash, initially empty, used to store warnings.
-
-\end{description}
-
-When errors are reported through calls to \code{error}, they will be stored in the \code{errorbuffer} if this exists, otherwise printed immediately. This is used to store errors reported before e.g. the HTML header has been written. After the appropriate headers have been written and the output channel is ready for writing, a call to \code{flush_errorbuffer} is made which prints all the errors in the errorbuffer and then deletes it so further errors will be printed immediately rather than buffered.
-
-
-\subsection{The \Obj{TeXcode} object}
-
-The \Obj{TeXcode} object is used to encapsulate the \LaTeX{} code and corresponding counts. It is created by the \code{TeXcode} routine. The values it contains are:
-
-\begin{description}
-
-\item[\code{filename}, \code{filepath}:] The name and path of the parsed \LaTeX{} file.
-
-\item[\code{PATH}:] An array containing the paths to search for included documents. At creation, this is empty, but calls to \code{_add_file} will set it; the top level files, initiated from \code{parse_file}, will have this set to \code{\$workdir}.
-
-\item[\code{texcode}:] Initialised with the \LaTeX{} document as a single string. If included files are to be inserted into the document, they will be inserted into the \code{texcode} string.
-
-\item[\code{texlength}:] Counts the total length (in characters) of \LaTeX{} code. Initialised with the length of the \LaTeX{} document. If included documents are inserted, their length is added to \code{texlength}.
-
-\item[\code{line}:] Initialised to an empty string. During parsing, segment by segment (one paragraph at a time) is moved from \code{texcode} to \code{line}. Tokens are then read and subsequently removed from \code{line}.
-
-\item[\code{next}:] Initialised to \code{undef}, this stored the next token to be processed. Upon tokenization, the token is identified and removed from the start of \code{line} and moved to \code{next}.
-
-\item[\code{type}:] Initialised to \code{undef}, this contains the token type (\code{\$TOKEN_\wild}) of the \code{next} token.
-
-\item[\code{style}:] Initialised to \code{undef}, this is used to set the style with which the \code{next} token should be presented in the verbose output.
-
-\item[\code{printstate}:] Initialised to \code{undef}, this is used output the active parsing state for use with verbose output (if \code{\$showstates} is set).
-
-\item[\code{eof}:] Initialised to 0, this is set to 1 once the end of the document is reached.
-
-\item[\code{countsum}:] The contains the main \Obj{count} object.
-
-\item[\code{subcount}:] This contains the present subcount which is also a \Obj{count} object. These subcount are used to count e.g. section and chapters of the document.
-
-\item[\code{errorcount}:] Initialised to 0, used to count the number of errors reported during the parsing.
-
-\item[\code{errorbuffer}:] Undefined at initiation, indicating that errors should be printed instantly rather than stored for later printing. Can be defined as an array which is then used to store error messages so they can be printed later. 
-
-\item[\code{warnings}:] Hash used to store warnings.
-
-\end{description}
-
-When the \Obj{TeXcode} object is initialised, the \LaTeX{} document is placed as a single big string in \code{texcode}. During parsing, \code{next_token} is called on to return the next token, which in turn it delegates to \code{_get_next_token}. Instead of operating on the whole document, which was done in older version of \TeXcount{} and was quite slow on large document, \code{more_texcode} is called on to move segments (i.e. paragraphs) of \LaTeX{} code from \code{texcode} to \code{line}, and then it grabs one token at a time from \code{line}. This is when \code{next} and \code{type} are set.
-
-When the tokens are interpreted and counted, \code{inc_count} is called which increments the appopriate counter in \code{subcount}. If a new subcount is initiated, a call to \code{next_subcount} adds \code{subcount} to \code{sumcount}, including appending the \code{subcount} object to the list of subcounts stored with \code{sumcount}, and then replaces \code{subcount} with a new \Obj{count} object. 
-
-
-\subsection{The \Obj{count} object}
-
-The \Obj{count} object is used to store the word and text element counters. It is created by \code{new_count}. The values it contains are:
-
-\begin{description}
-
-\item[\code{title}:] A string set upon creating to contain a descriptive title of the count.
-
-\item[\code{counts}:] An array, initialized with 0s, which is used to store the counts. The size of the array is determined by \code{\$SIZE_CNT} and should reflect the number of \code{\$CNT_\wild} indices defined.
-
-\item[\code{subcounts}:] This is an array, initialised to an empty array, used to store the subcounts.
-
-\end{description}
-
-In addition to the default fields, when used as the \code{sumcount} field of a \Obj{TeXcode} object, a few additional fields are added:
-
-\begin{description}
-
-\item[\code{TeXcode}:] This is a reference pointing back to the \Obj{TeXcode} object in which it is contained.
-
-\end{description}
-
-
-
-\section{\LaTeX{} code parsing and interpreting}
-
-The entry point for parsing a \LaTeX{} document is the \code{parse} routine. This simply calls \code{_parse_unit} repeatedly using parsing state \code{\$STATE_TEXT} until the end of the document is reached. Thus, \code{_parse_unit} is the main routine for performing the actual parsing.
-
-The \code{_parse_unit} routine is called with a \Obj{TeXcode} object, a parsing state, and optionally an unit-ending token as arguments. It then calls \code{next_token} on the \Obj{TeXcode} object until the unit-ending token is reached: if the file ends before this is found, an error is reported. If no unit-ending token is provided, only one unit will be parsed. If the unit-ending token is set to \code{\$_PARAM_}, indicating that the unit to be parsed is a macro parameter, the \code{\$simple_token} flag is set and passed to \code{next_token} to avoid combining letters into words, and only one token is parsed before returning.
-
-For each token, depending on the token, token type, and active parsing state, \code{_parse_unit} decides how the token should be interpreted. In some cases, the interpretation is done within \code{_parse_unit}, but in many cases the interpretation is delegated to subroutines like \code{_parse_macro}, \code{_parse_math}, etc. If new groups (\code{\{\ldots\}} or \code{\bs{begin}\ldots\bs{end}}) are encountered, this causes \code{_parse_unit} to be cause recursively with an unit-ending token passed to \code{_parse_unit} to identify the group end.
-  
-Note that by default, even blocks that are to be ignored are parsed and required balanced units. Different exclude states exist to deal with cases in which the unit should not be completely parsed.
-
-Upon interpreting the parsed tokens, \code{_parse_unit} or the subroutines to which it delegates the interpretation control the counter incrementation as well as how the tokens are presented in the verbose output. The counter incrementation is done through calls to \code{inc_count} passing as arguments the \Obj{TeXcode} object, the appropriate count reference (\code{\$CNT_\wild}), and optionally a number if the counter should be increased by a number different from 1. Specifying how the token should be presented in the verbose output is done by deciding on the style, usually set using \code{set_style}: the styles are represented by strings that give the style name, which are the same as used as keys in \code{\%STYLE} and as styles in the HTML output.
-
-If a style for presenting a token is selected which is not in the \code{\%STYLE} hash, the token is not printed. Thus, the \code{\%STYLE} hash also filters which tokens are printed to the verbose output.
-
-
-\subsection{Tokenization and token handling}
-
-The routine for retrieving the next token is \code{next_token}. This first makes sure that the previous token gets printed to the verbose output with the style specified by \code{set_style}. It then calls \code{_get_next_token} to retrive the next token: this will process comments and line breaks itself until a token is retrieved that it returns.
-
-The \code{_get_next_token} routine checks the \code{line} field of the \Obj{TeXcode} object to determine which is the next token in \code{line}. If the \code{line} field is empty, it calls \code{more_texcode} to move the next segment of \LaTeX{} code from the \code{texcode} field of the \Obj{TeXcode} object to \code{line}. When it has decided on the approriate kind of token, removing it from the start of the \code{line} field in the process, it sets the \code{next} and \code{type} fields of the \Obj{TeXcode} object through calls to \code{__set_token} or \code{__get_token} (for single character tokens).
-
-If the optional \code{\$simple_token} flag is set, only simple tokens will be returned: i.e. letters will not be combined into words. This is used for parsing macro parameters.
-
-
-\subsection{Processing parameters and options}
-
-In \code{_parse_unit}, based on the parsing state and parsed token, it is decided how to interpret and process the token. In some cases, this processing is restricted to the parsed token itself: counting or ignoring it as well as deciding on the style with which it should be presented in the verbose output.
-
-In some cases, the token influences the parsing of subsequent text: e.g. macros can take parameters and options. Special subroutines exist to handle parsing of macro parameters, gobble up spaces or macro parameters, or handle ignored regions.
-
-
-\subsection{Verbose output}
-
-By default, all parsed code is processed for printing to the verbose output. If it actually gets printed or not depends on whether the set style is included in the \code{\%STYLE} hash or not. 
-
-Upon parsing a token, it is stored in the \code{next} field of the \Obj{TeXcode} object. If \code{set_style} is called during processing, this will set the \code{style} field of the \Obj{TeXcode} object, but will not itself print the token. The \code{flush_next} routine is used to print the \code{next} token using the style set in the \code{style} field, or provided in the call; this in turn calls \code{print_style} which is responsible for the printing. There is an automatic call to \code{flush_next} when the next token is retrieved, ensuring that all tokens are sent off for printing. When \code{flush_next} is called, the \code{style} field is set to \code{\$STYLE_BLOCK='-'} which blocks further printing (or change in style) of the token; the \code{style} field is then set to \code{undef} by \code{next_token} upon reading the next token.
-
-The tokens are passed to \code{print_style}, either directly from the parsing or via \code{next_token}, which looks up the style in the \code{\%STYLE} hash. Only tokens whose style is defined in the \code{\%STYLE} hash get printed. If colour coded output to text is set, the values \code{\%STYLE} are used with the \code{ansiprint} function to print the token using ANSI colour codes. If output to HTML is chosen, the token will be printed enclosed in a \code{<span>} tag using the style as class; the HTML style definitions are then used to determine how these elements will be displayed.
-
-Special style values are \code{\$STYLE_EMPTY=' '} which is used for spaces and must be defined in the \code{\%STYLE} for spaces to be printed, and the \code{\$STYLE_BLOCK='-'} style value which is not actually a style but a value used to mark that the token has already been printed and block further printing of it.
-
-In addition to the \code{\%STYLE} hash which specifies which tokens get printed, there is a global variable \code{\$printlevel} the value of which is taken from the \code{\%STYLE} which is used to control if verbose output is on ($1$ or $2$) or off ($0$ or $-1$). The $-1$ values indicates the quiet mode in which errors should not be printed; the value $1$, as opposed to $2$, indicates that multiple ignored lines should be collapsed to make the verbose output more compact, although this is only partially done.
-
-The routines for handling tokens, styles and verbose printing remain from the earliest version of \TeXcount{} and has not undergone much improvements or cleaning up and remains somewhat unstructured. Hence, there may be stray calls to e.g. \code{set_style} that no longer have any effect.
-
-
-
-\section{Regex patters: letters, words, macro options}
-
-One of the most important regex definitions in \TeXcount{} is that used to recognize words. This is done in two steps: first a regex for letters is produced, and then this is combined with patterns for words to generate one big pattern.
-
-Another regex defined is the one used to recognize macro options, i.e. \code{[\ldots]}, that appear together with macros and which should be ignored.
-
-One reason behind the desire to generate one big pattern rather than loop through alternative patterns is to enable Perl to compile each pattern just once. The pattern compilation typically takes longer than the pattern matching, so this can make a big difference.
-
-
-\subsection{The word regex}
-
-First note that \TeXcount{} distinguishes between alphabetic words, i.e. words composed of letters, and logograms (e.g. Chinese characters) which are counted per character. When words (or letters) are counted, these are made from characters defined as alphabetic; characters defined as logographic are counted separately character by character.
-
-The regex pattern recognizing a letter is placed in \code{\$LetterPattern}. This is usually taken from one of the optional patterns in \code{\%NamedLetterPattern}, but can be modified elsewhere or replaced by \code{undef} to signify that no words or letters should be counted.
-
-A number of regex patterns which should be recognized as words are place in the array \code{\@WordPatterns}. This is usually set by using one of the named lists of word patterns defined in \code{\%NamedWordPattern}, but can be redefine or modified by options. In the word patterns, the character \code{\@} is used to represent a letter, and this is later replaced by \code{\$LetterPattern} when the options are applied.
-
-After parsing the command line arguments, the options and settings are applied. At this point, through \code{apply_language_options}, \code{\$LetterPattern} is applied to \code{\@WordPatterns}, which are then combined into a single regex: \code{\$WordPattern}. At this point, patterns for recognizing logograms are also added.
-
-
-\subsection{The macro option regex}
-
-After macros and macro parameters, macro options on the form \code{[\ldots]} will be ignored. There is a single regex used to recognize and remove these macro options.
-
-For most uses, macro options tend to be short codes which are easily recognized. However, there are also cases where the macro options can be more complex. On the other hand, there are also cases where brackets are used without being macro options, and it is vital that these cases should not be mistaken for macro options: in particular if they contain text that should be counted.
-
-In order to capture most macro options as options without running a risk of ignoring actual text enclosed in brackets, restrictions are placed on what can go inside macro options. The default rule is moderately strict, but can be relaxed to allow more extensive and general macro options.
-
-The different macro option regex patterns are named in \code{\%NamedMacroOptionPattern} and copied to \code{\$MacroOptionPattern} when initialised or changed by options.
-
-
-\subsection{Unicode character classes}
-
-The user can specify which character classes should be considered alphabetic (i.e. letters) and which should be considered logographic (i.e. counted as indicidual characters). Typical alphabetic characters are the Latin letters. Typical logograms are the Chinese characters. If any of the language options are used, these character classes will automatically be set.
-
-Specifications of alphabets and logograms are done by options \code{-alpha=} and \code{-logo=} using Unicode character classes. Unicode classes include Latin, Digit, Ideographic, Han, etc. Note that all Unicode character classes start with capital letters. 
-
-
-\subsection{Custom made character classes}
-
-Some of the Unicode character classes are not defined quite as desired by \TeXcount{}. In particular, the \code{Alphabetic} character class includes \code{Ideographic}, which would cause e.g. Chinese characters to be allowed as parts of words together with Latin characters rather than force them to be counted as individual characters. To resolve this problem, new character classes are defined in \TeXcount{} that fit our need.
-
-New character classes can be defined within \TeXcount{} through subroutines named \code{Is_\textit{name}}. Most notable is the \code{Is_alphabetic} character class from which the logographic characters have been excluded. This is now used as the default alphabetic character class.
-
-Presently defined characters classe are named \code{digit}, \code{alphabetic}, \code{alphanumeric}, \code{punctuation}, \code{cjk}, \code{cjkpunctuation}. Note that these are all lower case, and have the prefix \code{Is_} added when referred to in the code.
-
-When adding character classes to the set of alphabetic or logographic characters using \code{-alpha=} or \code{-logo=}, the names without the prefix \code{Is_} may be used: for character classes starting with a lower case letter, the prefix is added automatically.
-
-Note that the subroutines specifying the character classes must be defined prior in the code to any use: this is unlike other subroutines which may be defined anywhere in the code. Also, to be permitted as character classes by Perl, the subroutines must start with \code{Is_} (or \code{In_} although that is not used by \TeXcount{}), although different versions of Perl need not enforce this.
-
-
-
-\section{Macro handling rules}
-
-While some rules for handling macros are hard-coded into \TeXcount{}, most of the rules are stored in a number of hashes which \TeXcount{} look up whenever a macro is encountered. The general rule is that the keys are either macros (e.g. \code{'\bs{section}'}) or environment names (e.g. \code{'quote'}).
-
-\begin{description}
-
-\item[\code{\%TeXmacro}:] The keys are macros, or \code{'begin\textit{name}'} where name is an environment name, and the values specify how many parameters the macro (or environmemt) takes and how these should be processed. See the section on parameter handling rules further down.
-
-\item[\code{\%TeXenvir}:] The keys are environment names, and values are the parsing state with which the contents of the environment should be parsed.
-
-\item[\code{\%TeXpreamble}:] These are macro handling rules to be applied in the preamble, i.e. after \code{\bs{documentclass}} but before \code{\bs{begin}\{document\}}. The rules are specified as for \code{\%TeXmacro}.
-
-\item[\code{\%TeXfloatinc}:] These are macro handling rules to be applied within floating bodies, i.e. tables and figures.
-
-\item[\code{\%TeXmacroword}:] The keys are macros, and the values are numbers representing how many words the macro generates. This is used for macros like \code{\%LaTeX} which generates text.
-
-\item[\code{\%TeXpackageinc}:] The keys are macros used to include packages. Although included in \code{\%TeXmacro}, the processing of package inclusion is actually performed by \code{_parse_include_package} independent of the hash value. The value should therefore be \code{1} or \code{[\$STATE_IGNORE]} since this is how it will be processed by \code{_parse_include_package}.
-
-\item[\code{\%TeXfileinclude}:] The keys are macros used to include \LaTeX{} files into the document, the value a keyword or list of keywords telling how file names and paths should be interpreted. Processing of these macros is done by \code{_parse_include_file}.
-
-\end{description}
-
-Note that the definition of \code{\%TeXmacro} starts by including \code{\%TeXpreamble}, \code{\%TeXfloatinc} and \code{\%TeXpackageinc}. After that, the values of \code{\%TeXpackageinc} are never used. For \code{\%TeXpreamble} and \code{\%TeXfloatinc}, however, it is in principle possible to rules within the preamble and floats, respectively, that are different from those defined in \code{\%TeXmacro} and applied elsewhere in the document.
-
-
-\subsection{Parameter handling rules}
-
-A macro can be specified to take a given number of parameters: this will typically be \code{\{\ldots\}} blocks following the macro. For each of these parameters, a separate parsing state can be specified. This is represented by an array with one element for each parameter, the elements being the parsing state (\code{\$STATE_\wild}) with which that parameter should be parsed.
-
-In addition to the \code{\$STATE_\wild} rules are some modifier/option states, \code{\$_STATE_\wild}. The \code{\$STATE_OPTION} states indicates that the next rule in the list is an optional parameter enclosed in \code{[]}. By default \code{[]} options are ignored, which can be swithed off by \code{\$STATE_NOOPTION} or on by \code{\$STATE_AUTOOPTION}.
-
-An alternative specification of a parameter handling rule is to give the number of parameters to ignore. \TeXcount{} will check if the specified rule is an array (as described above) or a number and interpret the rule accordingly.
-
-The hashes \code{\%TeXmacro}, \code{\%TeXpreamble} and \code{\%TeXfloatinc} all take values that are this kind of parameter handling rules, as are q\code{\%TeXpackageinc} since they are included in \code{\%TeXmacro}.
-
-Throughout the script, parsing states are referred to using the \code{\$STATE_\wild} constants. In previous versions, however, these codes were hard-coded into the script and used both to set up the hashes and to specify new rules through \%TC instructions. For backward compatibility, the old numerical state codes remain in the conversions from keywords to \code{\$STATE_\wild} constants as stored in \code{\%key2state} and applied through calls to \code{convert_hash} accompanied by \code{keyarray_to_state} or \code{key_to_state}.
-
-
-\subsection{File inclusion and the \code{\%TeXfileinclude} hash}
-
-The main \LaTeX{} commands for file inclusion are \code{\bs{input}} and \code{\bs{include}}, while \code{\bs{bibliography}} includes the \code{.bbl} bibliography file. However, additional packages exist that can also modify the file search path, of which \TeXcount{} has support for the \code{import} package.
-
-File inclusion macro rule are stored in the \code{\%TeXfileinclude} hash. The values are strings which contain one or more keywords (separated by space or comma):
-%
-\begin{description}
-
-\item[\code{input}:] This is a special keyword to use with \code{\bs{input}}. The handling of the parameter values is as \code{file}, but the parameter itself is not required to be enclosed in \code{\{\}}.
-
-\item[\code{file}:] This parameter simply gives the name of or path to a file. If the file is not found, \TeXcount{} will append \code{.tex} and try again.
-
-\item[\code{texfile}:] This parameter gives the name of or path to a file, but \code{.tex} will be appended, and is the rule used by \code{\bs{include}}.
-
-\item[\code{dir}:] This parameter provides the path of a directory relative to the \code{\$workdir}, and adds this to the search path before including any files. This is used with the \code{\bs{import}} macro of the \code{import} package.
-
-\item[\code{subdir}:] This parameter provides the path of a directory relative to the current directory, and adds this to the search path before including any files. This is used with the \code{\bs{subimport}} macro of the \code{import} package.
-
-\item[\code{<bbl>}:] This is a special keyword to use with \code{\bs{bibliography}} to specify inclusion of the bibliography file.
-
-\end{description}
-
-The parsing of the macros and parameters is done by \code{_parse_include_file}. For each keyword it parses a parameter, unless the parameter is on the form \code{<\textit{keyword}>}. The parsing of the \code{input} parameter is handled differently from the rest since it need not be enclosed by \code{\{\}}. It then delegates the processing of macro inclusion rules to \code{include_file}.
-
-In \code{include_file}, the file is located (based on search path) and either appends the file to the \code{@filelist} array of files to be include, or merged immediately into the document by a calls to \code{read_binary} and \code{prepend_code}.
-
-The \code{@filelist} array contains elements which are themselves arrays on the form \code{[file,path,\ldots]} where the first element is the path to the file to be included, and the remaining elements are the search paths used to set the \code{PATH} values of the \Obj{TeXcode} object. For the top level files, i.e. the ones specified on the command line, the search path will contain only \code{\$workdir}: the directory from which \TeXcount{} is executed unless \code{-dir} is used to specify otherwise. If more directories are added to the path, \code{\$workdir} will remain the last directory of the search path, while the first directory of the search path will be considered the current directory.
-
-File inclusion macros can also take parameters that should be parsed using regular macro parsing rules. The \code{TeXmacro} hash will be checked for \code{@pre\bs{macroname}} and \code{@post\bs{macroname}} entries which will be applied before and after the file handling rules.
-
-
-\subsection{Package and document class specific rules}
-
-Whenever \TeXcount{} encounters a package inclusion, it will check for package specific rules. These are defined in hashes names \code{\%PackageTeXmacro} etc. which maps the package name to the hash map of rules to be added to \code{\%TeXmacro} etc. There is an additional \code{\%PackageSubpackage} which for each package name in the set of keys maps to a list of packages whose rules should automatically be included.
-
-Similarly, rules specific to particular document classes may be implemented by using the key \code{class\%\parm{name}} instead of the package name, and these will then be added to the set of parsing rules if \code{\bs{documentclass}\{\parm{name}\}} is encountered.
-
-Note that rules for including the bibliography is also stored in these hashes under the key \code{\%incbib}.
-
-
-
-\section{Presentation of summary statistics}
-
-The counts (words, headers, etc.) from a \LaTeX{} document are stored as a \Obj{count} objects. The main routine for printing the summary statistics from a \Obj{count} object is \code{print_count}: the routine \code{conditional_print_total} which is called from \code{MAIN} delegates printing to \code{print_count} except if the brief output format is selected. The \code{print_count} routine then delegates the printing to one of a number of subroutines depending on the settings.
-
-Word frequencies are store globally in \code{\%WordFreq}. This gets incremented each time \code{_process_word} is called. Summary of word frequencies are produced and printed by \code{print_word_freq} which tries to combine words that differ only by capitalization, and also produces subcounts per character class.
-
-A global count of the number of errors reported is stored in \code{\$errorcount}, while warnings are stored globally in the \code{\%warnings} hash mapping when added through the \code{warning} routine with the warning as key and the number of occurrences as value to ensure each warning is only listed once no matter how many times it is reported. Both warnings and errors are also stored in their respective \Obj{Main} or \Obj{TeXcode} objects when reported through calls to \code{error} or \code{warning}.
-
-In \code{MAIN}, after processing of the \LaTeX{} documents, \code{Report_Errors} is called to give a total report on errors and warnings. The exact output depends on the settings. 
-
-NB: Processing of errors and warnings requires some improvement. Now, parts of the code handle errors per file, others do so globally.
-
-
-
-\section{Encodings}
-
-The preferred encoding is Unicode UTF-8. From version 2.3 of \TeXcount{}, this is used internally to represent the \LaTeX{} code, and Unicode is relied upon to handle different character sets and classes.
-
-When files are read into \TeXcount{}, they may have to be decoded from whatever encoding they are saved in into UTF-8. The file encoding may be specified explicitly using the \code{-enc=} option, otherwise \TeXcount{} will try to guess the appropriate encoding.
-
-The output from \TeXcount{} is be default UTF-8. However, if a file encoding is specified using \code{-enc=} and output is text, not HTML, this encoding will also be applied to the output. This may be useful when using \TeXcount{} in a pipe, otherwise the documents will be converted to UTF-8.
-
-
-
-\section{Help routines and text data}
-
-A hash, \code{\%GLOBALDATA}, and hash reference \code{\$STRINGDATA} are is defined for storing strings used for various outputs. The \code{\%GLOBALDATA} is set up containing string constants for version number, maintainer name, etc., while \code{\$STRINGDATA} is initially undefined.
-
-The \code{\$STRINGDATA} hash is accessed through calls to \code{StringData} which initialises the hash if undefined. Initialisation, which is done by \code{STRINGDATA}, reads through the \code{__DATA__} section at the end of the script, identifies headers which are used as keys in the hash which maps to and array containing the subsequent text lines. References in the read text on the form \code{\$\{keyword\}} are replaced by the corresponding string in \code{\%GLOBALDATA}: this allows e.g. version information to be inserted into the text.
-
-Headers in the text data consists of three or more colons followed by space(s) and a keyword. Lines containing three or more colons but no keyword have no effect.
-
-Lines starting with \code{\@} are used to format output printed by \code{wprintlines}. The two characters \code{'-'} and \code{':'} can then be used to indicate indentation tabulators, and subsequent lines will be indented and wrapped: this is used for printing help on command line options. The \code{wprintlines} also wraps text: the page with is set by \code{\$Text::Wrap::columns}.
-
-
-\end{document}

Added: trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf	2017-09-19 22:01:25 UTC (rev 45338)

Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,50 @@
+\documentclass{article}
+\usepackage[T1]{fontenc}
+\usepackage{a4wide}
+
+\include{macros}
+
+
+\title{%
+\TeXcount{} Quick Reference Guide\\
+Version \version\copyrightfootnote
+}
+
+\begin{document}
+
+\maketitle
+
+\section{Command line options}
+
+Syntax for running \TeXcount{}:
+\codeline{texcount \textit{[options] [files]}}
+where \code{texcount} refers to the TeXcount Perl-script, and the options may be amongst the following:
+
+\input{sub_options}
+
+
+\section{\TeXcount{} instructions embedded in \LaTeX{} documents}
+
+Instructions to \TeXcount{} can be given from within the
+\LaTeX{} document using \LaTeX{} comments on the format
+\codeline{\%TC:\textit{instruction [name] parameters}}
+where the name is use for instructions providing macro handling rules to give the name of the macro or group for which the rule applies.
+%
+\input{sub_tc_other}
+
+\subsection{Adding macro handling rules}
+
+Instructions for adding macro handling rules all take the format
+\codeline{\%TC:\textit{instruction name parameters}}
+where the name indicates the macro (with backslash) or group name for which the rule applies:
+%
+\input{sub_addrules}
+
+The available parser rules for environment contents and macro parameters are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath}, \code{ignore}, \code{xx} (strong exclude), \code{xxx} (stronger exclude), \code{xall} (exclude all) or any of their aliases.
+
+The available counters are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath} or any of their aliases.
+
+Available file specifications contain one or more of \code{input} (for \code{\bs{input}}), \code{file} (file path), \code{texfile} (use with \code{\bs{include}}), \code{<bbl>} (to include the bibliography file), \code{dir} and \code{subdir}. The \code{dir} and \code{subdir} are used to modify the search path within the included document (used with the \code{import} package).
+ 
+
+\end{document}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/QuickReference.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf	2017-09-19 22:01:25 UTC (rev 45338)

Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,726 @@
+\documentclass{article}
+\usepackage[T1]{fontenc}
+\usepackage{a4wide}
+\usepackage{listings}
+\usepackage{url}
+
+%% TeXcount parsing rules
+%TC:envir lstlisting [] xall
+%TC:macro url [ignore]
+
+%TC:subst \begin{abstract} \begin{abstract} ABS
+
+\include{macros}
+
+%\parindent=0pt\parskip=8pt
+\lstset{basicstyle=\ttfamily\scriptsize,xleftmargin=2cm,xrightmargin=2cm}
+\lstset{basicstyle=\ttfamily\scriptsize,xleftmargin=2cm,xrightmargin=2cm}
+
+\title{%
+\LARGE
+\TeXcount\\
+\Large
+Perl script for counting words in \LaTeX{} documents\\
+Version \version\copyrightfootnote
+}
+\author{Einar Andreas R{\o}dland}
+
+\begin{document}
+
+\maketitle
+
+{\abstract%
+\TeXcount{} is a Perl script for counting words in \LaTeX{} documents. It recognises most of the common macros, and has rules for which parameters to count and not to count; the main text is counted separately from the words in headers and in captions of figures and tables. Finally, it produces a colour coded version of the parsed document, either as a text document or as HTML to be viewed in a browser, indicating which parts of the document have been included in the count.
+%
+}
+
+{\scriptsize\tableofcontents}
+
+\pagebreak
+
+
+% ---------------------------------------------------------------------------
+
+\section{What \TeXcount{} does}
+
+\TeXcount{} is a Perl script made for counting the words in a \LaTeX{} document. Since \LaTeX{} documents are formated using lots of macro instructions and often contain both mathematical formulae and floating tables and figures, this is no trivial task.
+
+Simple solutions to counting the words consists of detexing the documents, which often merely consisty of ignoring the \TeX{} and \LaTeX{} instructions. This is a bad solution since it will usually result in over-estimating the number of words as mathematical formulae, citations, labels and references are counted.
+
+A perfect solution, if such exists, needs to take into account how \LaTeX{} interprets each macro instruction. The simplest approach to taking this into account consisty of making the count based on the typeset document, but this too tends to over-estimate the word count as mathematical formulae, table contents and even page numbers may get counted.
+
+A simple but robust approach, which is the one I have taken with \TeXcount{}, is to parse the \LaTeX{} document using simple rules for how to interpret the different \TeX{} and \LaTeX{} instructions. Rules for most of the common macro instructions are included in \TeXcount{}, and it is possible to specify new rules in the \TeX{} document.
+
+The primary focus of \TeXcount{} is to:
+\begin{itemize}
+
+\item provide an accurate count of the number of words in \LaTeX documents;
+
+\item exclude or count separately document elements which are not part of the main text such as figure captions;
+
+\item enable the user to, with relative ease, check how \TeXcount{} has parsed the document and which elements have been counted and which have not.
+
+\end{itemize}
+The last point on this list is one of the most important. Having an accurate word count is of little value unless you know that it is accurate; conversly, trusting an inaccurate word count can be potentially harmful, e.g. if you are submitting a paper or a report which has a strict word limit.
+
+\TeXcount{} handles complete \LaTeX{} documents, i.e. that start with \code{\bs{documentclass}} and has the text between \code{\bs{begin}\{document\}} and \code{\bs{end}\{document\}}, as well as partial documents made to be included in another \LaTeX{} document. However, in either case, it requires that all groups are closed: \code{\{\ldots\}} and \code{\bs{begin}\ldots\bs{end}}.
+
+Automatic parsing of included documents is possible, but is by default turned off. There are two options for turning this on: \code{-inc} and \code{-merge}. Turning it on using \code{-merge} will merge the included files into the main document. By using \code{-inc}, however, the included files are parsed separately rather than include the text into the appropriate location: this will perform a separate word count of the included document which is then later included in the total sum.
+
+Since \TeXcount{} relies on a relatively simple rules for handling the different macros and only performs limited checks on the validity of the \LaTeX{} document, it is your responsibility to make sure the document actually typesets in \LaTeX{} before running it through \TeXcount{}. Also, \TeXcount{} relies on handling macros taking parameters enclosed with \{ and \}, and on ignoring options enclosed by [ and ]: macros with significantly different syntax such as \code{\bs{vskip}} cannot be handled. There are also limitations on what may be contained in macro options enclosed in [], although this restriction may be relaxed by specifying the command line option \code{-relaxed}.
+
+
+\subsection{What \TeXcount{} counts}
+
+Basically, \TeXcount{} has seven different counts plus an additional file count for use with total counts over a set of files. These and their indices (numbers used to identify them) are:
+\begin{description}
+\item[0. Number of files:] When multiple files are included, this is counted.
+\item[1. Text words:] Words that occur in the main text.
+\item[2. Header words:] Words that occur in headers, e.g. \code{\bs{title}} and \code{\bs{section}}.
+\item[3. Caption words:] Words that occur in figure and table captions.
+\item[4. Header count:] This counts the number of headers, i.e. each \code{\bs{section}} counts as 1.
+\item[5. Figure/float count:] This counts the number of floats and figures, e.g. \code{table} and \code{figure} environments.
+\item[6. Inline formulae:] This counts the number of inline formulae, i.e. \code{\$\ldots\$}.
+\item[7. Displayed formulae:] This counts the number of displayed formulae, e.g. \code{\bs{[}\ldots\bs{]}} or \code{equation} environments.
+\end{description}
+These are stored in an array and sometimes referenced by their index: e.g. in the option \code{-sum=} which takes parameter values corresponding to counts 1 to 7. In other contexts, however, like in the \code{-tempate=} or when incrementing specific counters through the \code{\%TC:macrocount} instruction, the counters may be referred to by keywords rather than the indices 0 to 7.
+
+There is also support for adding additional counters using the \TeXcount{} instruction \code{\%TC:newcounter}. These will then be added to the end of the list of counters and should preferably be referred to by name, not index.
+
+\subsubsection{What \TeXcount{} counts as a word}
+
+The primary role is to count the words. It is not entirely clear what should be considered words, so I have had to make some decisions. A sequence of letters is certainly a word. \TeXcount{} also counts acronyms like \textit{e.g.}, dashed words like \textit{over-all}, and \textit{it's} as one word. It also counts numbers as words unless they are placed in a math group. If \TeXcount{} breaks words that contain special characters, you may try the option \code{-relaxed} which extends the range of characters allowed as part of words.
+
+Alternatively, \TeXcount{} may be asked to count the number of letters/characters (not including spaces). It may also be set to count Chinese or Japanese characters.
+
+Mathematical formulae are not counted as words: it would be difficult to define a sensible rule for this. Instead, \TeXcount{} counts the number of inline formulae and displayed formulae separately. You may then decide on how to combine these counts with the word counts, e.g. using the \code{-sum} option.
+
+Text in headers (\code{\bs{title}}, \code{\bs{section}}, etc.) are counted separately: \TeXcount{} counts the number of headers as well as the number of words in headers. It may also provide subcounts for each of these by specifying the \code{-sub} option.
+
+Floating environments (or potentially floating environments) such as tables and figures are not counted as text, even if the cells of a table may containt text. However, if they have captions, these will be counted separately much like headers were. Footnotes are included in this count. By default, environments do not modify the parsing state: i.e. environments within the text are counted as text, etc. Rules for the most common environments, at least those that require non-default treatment, should be predefined, but you may have to add more rules if you use environments defined in packages or by yourself. If you wish to be warned against any environments names you use that lack a defined rule, set the option \code{-strict}.
+
+Some macros are words by themselves: e.g. \code{\bs{LaTeX}}. These are counted as words provided the macro word rule has been defined for them, but you cannot expect \TeXcount{} to count something like \code{\bs{LaTeX}-word} or \code{\{\bs{TeX}\}count} as one word although the above explanation inicates that it should: \TeXcount{} will in both cases evaluate the macro and the following text separately and thus count them as separate entities. Since \TeXcount{} recognises \code{\bs{LaTeX}} and \code{\bs{TeX}} as single words, each of the two examples would end up being counted as two words.
+
+
+\subsection{What \TeXcount{} does not do}
+
+While an ideal solution should be able to expand the macro instructions, thus being able to handle new macros, that would at it's worst require reimplementing much of \TeX{}, something that is clearly unrealistic. Instead, I have opted for a simpler solution: to define rules stating which paramters to count and which to ignore and allowing for such rules to be added easily. Thus, \TeXcount{} cannot handle macros that may take a variable number of parameters. Nor can it handle macros that takes parameters on forms other than \code{\{parameter\}}. However, support has now been added for macro options on the form \code{[\ldots]} to be parsed.
+
+In general, while \TeXcount{} does the parsing in some detail, it does not do it exacly as \TeX{} does it. In some respects there may therefore be critical differences: e.g. while \TeX{} reads one character at a time, \TeXcount{} reads one word at a time, so while \LaTeX{} would interpret \code{\bs{cite} me} as \code{\bs{}cite\{m\}e}, \TeXcount{} would interpret it like \code{\bs{cite}\{me\}}.
+
+Another issue is that, since \TeXcount{} does not know how to expand macros, it cannot handle macros like \code{\bs{maketitle}} that add text to the document. With respect to \code{\bs{maketitle}}, I have instead set the rule for \code{\bs{title}\{title text\}} to count this as a header although it does not itself produce any text.
+
+
+\subsection{Problems to be aware of}
+
+In most large documents, there will be cases where \TeXcount{} does not give an exact count. Reasons may be macros \TeXcount{} does not recognise, words that \TeXcount{} split in two (or more) because of special characters not recognised as letters, or options and parameters not counted which actually produce text. Some problems may also arise because it is not always clear what should be counted and \TeXcount{} implements one particular choice: counting numbers as letters/words, not counting formulae as words, not to count tables as text, etc. However, hopefully these should either consist of individual, infrequent errors which should have limited effect on the total count, or entire regions that are included or excluded for which the user may change the parsing rule to produce the desired count.
+
+There are, however, problems that may arise which are more fundamental and result in counts which are simply wrong rather than just inaccurate, or even make \TeXcount{} fail entirely.
+
+If \TeXcount{} fails to detect environment endings properly, either closing \code{\{} or \code{\bs{end}}, it may end up ignoring major parts of the document. This should normally produce errors of some kind, although there may be cases when no errors are produced. However, by looking at the verbose output, it will be very clear that entire parts of the document has been excluded. Such problems may be cause by macros that allow unmatched group delimiters, and some effort has been made to minimise the risk of this at the cost of risking other but less critical errors: e.g. there are limits to what is permitted as macro options in order to ensure that a single unmatched \code{[} does not cause large parts of the document to be interpreted as a big option.
+
+For users of languages containing letters other than the Latin letters A to Z, there is a risk that \TeXcount{} may have difficulty identifying words correctly. The script relies on Perl to recognise words as sequence of letters, and must therefore know which characters are considered to be letters. Words containing letters not recognised by \TeXcount{} will tend to be split into two or more words, which can dramatically inflate the word count. The first step is to ensure that the file is read using the correct encoding: I generally suggest using the UTF-8 Unicode encoding, and from version 2.3. this is the default encoding used by \TeXcount{}, although other encodings may also be used. Unicode has good annotation of which characters are letters, and starting with version 2.3, \TeXcount{} uses Unicode internally to represent the text.
+
+While non-Latin letters like \code{\aa} and \code{\"a} should be recognised as letters, \TeX/\LaTeX codes using macros or special characters, such as \code{\bs{aa}} and \code{\bs{"}a}, are not immediately understood as letters. I have added patterns aimed at recognising these as well, but depending on the code you are writing, these patterns may either not be flexible enough to recognise all letter codes, or may be too flexible and recognise things it should not. I have added a relaxed mode (\code{-relaxed}) and a more restricted mode (\code{-restricted}) in which these patterns are more general or more constrained, but you should check how this performs on you actual texts by viewing the verbose output.
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{Syntax and options}
+
+
+\subsection{Running \TeXcount{}}
+
+The command to run \TeXcount{} may vary slightly depending on the operating system and the local settings. You may also wish to rename it or define an alias.
+
+Under Windows, running \code{texcount.pl} from the command line suffices if \code{texcount.pl} is in the path and pl-files are defined to run as Perl scripts.
+
+Under Linux/Unix, it should be sufficient to run \code{texcount.pl} provided it is in the PATH and has been made executable (\code{chmod u+x texcount.pl}). The first line of the file contains the line \code{\#!/usr/bin/env perl} which should find the correct location for \code{perl} (provided the program \code{/usr/bin/env} is available). If not, run \code{which perl} to locate Perl and replace the first line of the script with \code{\#!\textit{path}}.
+
+Alternatively, if the above methods do not work, you may have to run \TeXcount{} exclicitly in Perl by executing \code{perl texcount.pl}. You then need to have the \code{perl} executable in the path or give the explicit path.
+
+\subsection{\TeXcount{} command-line options}
+
+For simplicity, I will simply write \code{texcount.pl} in this manual for the code to execute the script. The syntax then becomes
+\codeline{texcount.pl \textit{[options] [files]}}
+where the options may be amongst the following:
+
+\input{sub_options}
+
+If more than one file is given, \TeXcount{} will perform the count on each of them printing the results for each, then print the total sum at the end. Note that files are parsed one by one in order of appearance and counts made per file; only afterwards are the totals computed. 
+
+\subsubsection{Option handling alternatives and modifications}
+
+In some cases, eg when running from within a script, the option syntax may cause problems. Two minor modifications have been added.
+
+For set options on the form \code{-[options]=[value]}, you may use the alternative syntax \code{-[option]:[value]} to avoid any problems caused by the equal character.
+
+\TeXcount{} will ignore any option starting with \code{@-} which can either be used to temporarily exclude an option in editing an existing call to \TeXcount{}, or to use the \code{@-[option]} to pass options to scripts without having to remove these before passing the arguments on to \TeXcount{}.
+
+
+\subsection{File encoding}
+
+If your \TeX/\LaTeX{} document consists entirely of ASCII characters, there should be no problems with file encoding. However, if it contains non-ASCII characters, e.g. non-Latin letters such as \o{}, there are different ways in which these may be encoded in the files.
+
+The main encoding supported by \TeXcount{} is UTF-8 (Unicode), and this is used to represent text internally in \TeXcount{}. In older versions of \TeXcount{}, Latin-1 (ISO-8859-1) was the default encoding, but this may cause problems when using non-Latin characters.%
+\footnote{%
+In Perl, which \TeXcount{} is written in, Latin-1 is the default. However, starting with version 2.3, \TeXcount{} has switched to using UTF-8 (Unicode) internally and will convert text to Unicode before processing: in older version, internal representation was UTF-8 or Latin-1 depending on the options used.}
+Both of these are compatible with ASCII: i.e. both are extensions of ASCII, so ASCII characters will be treated correctly by both encodings, but non-ASCII characters will be treated differently.
+
+From version 2.3 of \TeXcount{}, it is possible to specify other encodings using the \code{-encoding=} option. If no encoding is specified, \TeXcount{} will guess which encoding is used. By default, this guessing is limited to ASCII, UTF-8 and Latin-1. If other encodings are used, the automatic guessing is likely to pick Latin-1 since most files would result in valid Latin-1 code. If the \code{-chinese} or \code{-japanese} option is set, it will guess at other encodings, but still with UTF-8 as the first choice.
+
+I generally recommend using UTF-8 Unicode: this is increasingly being the new standard. Basically, Unicode contains the characters needed for all existing languages, enumerated from 0 and upwards (beyond 100000), which resolves to problem of requiring different character sets. Since there are more than 256 characters in Unicode, Unicode cannot be represented using one byte per character: UTF-8 is a way to encode the Unicode characters into a list of bytes so that ASCII characters (no. 0--127) are represented by one byte (same as in ASCII), while non-ASCII characters are represented using two or more bytes. Unicode may also be encoded using two bytes to represent each of Unicode characters 0--65535, which covers most of practical use, but this is less commonly used as a file format: it is, however, common for internal representation of strings in memory, as done by e.g. Java, so Perl is the odd one out in using UTF-8 for internal string representation.
+
+If an encoding is specified using the \code{-encoding=} option, the input will be decoded from the specified encoding into UTF-8. If HTML output is specified, the output will be UTF-8. This ensures that all HTML produced is UTF-8, which is also the encoding specified in the HTML header. If text output is used, the specified encoding is used for the output. E.g. if you specify \code{-encoding=latin1}, \TeXcount{} will assume that all files are encoded in Latin-1, and will also produce the detailed output using Latin-1. For piping, i.e. option \code{-}, this is useful as it ensures the output has the same encoding as the input.
+
+For convenience, if no encoding is specified, \TeXcount{} will try to guess which encoding is the appropriate one. This is done simply by checking a specified list of encodings one by one until one is found that fits the text. The default is to check ASCII, then UTF-8, and finally Latin-1. If none fits, \TeXcount{} should try to decode the ASCII part of the text replacing non-ASCII characters with a wildcard character, although there may be cases when the decoding exits upon hitting an error. If Chinese or Japanese languages are specified, UTF-8 is tried first, then other encodings are checked depending on the language.
+
+Note that if no encoding is specified and \TeXcount{} left to guess the appropriate encoding, all output will be UTF-8. Thus, letting \TeXcount{} guess the encoding may not be suitable when using \TeXcount{} in a pipe since the UTF-8 output may not be compatible with the encoding of the input. If multiple files are parsed, \TeXcount{} will guess the encoding separately for each file even if they are included (\code{-inc} or \code{-merge}) in a file with an identified encoding, and may thus end up selecting different encodings for different files.
+
+
+\subsection{Language scripts, alphabets and character sets}
+
+In additional to the traditional Latin letters, A-Z, a number of letters are recognised by Unicode as part of the extension of the Latin letters. Some languages, however, use entirely different character sets.
+
+By default, \TeXcount{} has been set up to recognise all alphabets. However, there is a distinction between alphabets like the Latin, Greek, Cyrillic, etc. in which words consists of multiple letters, and languages like Chinese in which each character should be counted as a word. For simplicity, we refer to these as \emph{alphabetic} characters and \emph{logograms}.\footnote{%
+Actually, these names are not completely accurate. A logogram is a script which represents a word or `meaningful unit', but e.g. the Japanese kana and Korean hangul are counted as words although they represent sound or syllables rather than meanings.}
+The options \code{-alphabets=} and \code{-logograms=} (or \code{-alpha=} and \code{-logo=} for short) allows you to specify which characters to use as either alphabetic letters or whole word characters. These take values that consist of Unicode properties separated by \code{,} or \code{+}. The default setting corresponds to
+\codeline{-alphabets=Digit,alphabetic}
+in which \code{alphabetic} is defined by \TeXcount{} as the Unicode \code{Alphabetic} class minus logographic script classes, and
+\codeline{-logograms=Ideographic,Hiragana,Katakana,Thai,Lao}
+which should cover Chinese characters (\code{Han}) as well as the Japanese characters (\code{Han} for the kanji, \code{Hiragana} and \code{Katakana} for the kana). Both options remove previous script settings, unless the list is prefixed by \code{+} in which case the scripts are added: e.g. \code{-logograms=+cjkpunctuation} will add the CJK punctuation characters (defined by \TeXcount) to the set of counted characters.
+
+Applicable Unicode properties/scripts include \code{Digit}, \code{Latin}, \code{Greek}, \code{Cyrillic}, \code{Hebrew}, \code{Arabic}, \code{Han}, \code{Katakana}, \code{Hiragana}, and more.\footnote{A more complete overview is available at Wikipedia: \url{http://en.wikipedia.org/wiki/Script_(Unicode)}.}
+
+In addition to the Unicode properties, \TeXcount{} has added a few additional character groups. The properties \code{alphabetic}, \code{digit} and \code{alphanumeric} are more restrictive than their Unicode name-sakes: \code{alphabetic} excludes the default logographic character sets, and \code{digit} consists only of 0--9 unlike Unicode \code{Digit} which includes numerals from other scripts. There is also \code{cjkpunctuation} which is intended to identify Chinese/Japanese/Korean punctuation.
+
+Note that the Unicode properties are case sensitive. The native Unicode properties start with capital letters, whereas the properties defined by \TeXcount{} are all lower case. Invalid properties will be ignored.
+
+The options \code{-chinese} and \code{-japanese} still exist and simply restrict the logographic character sets. In addition, \code{-chinese-only} and \code{-japanese-only} will exclude alphabetic words from the counting, equivalent to \code{-alphabets=} with no script properties given. In addition, these options will change the lists of file encodings \TeXcount{} will try if no encoding is given.
+
+The option \code{-stat} has been added to produce overall word counts per script type. This uses the character classes specified in the \code{-alphabets=} and \code{-logograms=} options, so the default will be able to count which words are purely alphabetic and which contain numbers (or a combination of both), but will not distinguish between e.g. Latin and Greek. To do that, you would have to specify the script classes: e.g.
+\codeline{-alphabets=digit,Latin,Greek,Cyrillic}
+will count words containing the numbers 0--9, Latin letters (including the extended Latin character set), Greek letters and Cyrillic letters. Words may contain any combination of these: \TeXcount{} does not require that a word consist of only one type of script. Also, note that if \code{digit} had not been included, numbers would not be allowed to be part of or counted as words. The output statistics will then give the number of words containing each of these script classes (or combination).
+
+
+\subsection{Parsing details}
+
+By selecting one of the \code{-v} options, you can choose how much detail is printed. This is useful for checking what \TeXcount{} counts. Alternatives \code{-v0} to \code{-v4} control the amount of detail, with \code{-v} equal to \code{-v3}. The option \code{-showstate} shows the internal state and is for debugging purposes only: \code{-v4} switches this on.
+
+The output is colour coded with counted text coloured blue, other colours for other contexts. The colour coding is made using ANSI colour codes. These should work when printed directly to Linux xterm window, but need not work if piped through \code{more} or \code{less}: with \code{less} you need to use the option \code{-r} for the colours to be shown correcly.
+
+Under Windows or other operating systems, regular ANSI colour don't work, but there is a fix in place which adapts it for Windows, although this may not function exactly as desired.
+
+In general, however, I recommend using HTML output which can be viewed in a browser: in particular if the text output does not produce suitable colour coding.
+
+To print the details encoded as HTML document, use the option \code{-html}. Alternatively, \code{-htmlcore} only outputs the HTML body. I suggest using the options \code{-html -v} to get full detail, save this to a HTML file, e.g. using
+\codeline{texcount.pl -html -v -sum \textit{files} > details.html}
+where \code{-sum} computes the total count of words and formulae (or \code{-sum=1,1,1} to only count words) and adds the cumulative count at the end of each line of the parsing details, and \code{-sub} is on by default which produces subcounts per section.
+
+\subsubsection{Control of details in verbose output}
+
+The verbosity option, \code{-v=\parm{styles-list}} or \code{-v\alt{0-4}\parm{styles-list}}, can be used to select exactly which elements to include or exclude from the verbose output. The styles list consists of a list of styles or style categories with \code{+\parm{style}} or \code{-\parm{style}} used to indicate if they should be added or removed. If the first style in the list is one of the categories 0 to 4, the \code{=} can be dropped. The option \code{-help-style} returns an overview of the available styles and style categories, while \code{-help-style=\parm{style}} may be used to get a description of a particular style or style category.
+
+Each token in the verbose output has a defined style: e.g. \code{word}, \code{hword} (header word), \code{ignore}, \code{option}, etc. If the style is included in the styles list, it will be printed in the verbose output; if not included in the styles list, it will not be printed. Thus, by setting which styles are included in the styles list, you can specify in detail which tokens are included in the verbose output. The included styles correspond to the list of colour codes listed at the start of the output when \code{-codes} is set.
+
+The style categories, which include \code{0} to \code{4}, are groups of related styles: e.g. \code{Words}, \code{Macros}, \code{Options}, etc. Note that apart from \code{0} to \code{4}, the style categories have capital initials, while the styles themselves are all lower case.
+
+For example, if you only want to output words (including those in headers and other contexts), you can set the option \code{-v=Words}; using \code{-v=Words+math}, the equation contents will be included (but not the enclosing \code{\$\ldots\$}).
+
+
+\subsection{Summary information}
+
+By default, \TeXcount{} outputs counts of text words, header words, caption words, number of headers, number of floats/figures, number of inlined formulae, and number of displayed formulae, and lists each of these counts. To shorten this to a one-line format per file, specify \code{-brief}.
+
+To get \TeXcount{} to produce a total count, specify \code{-sum}: this will compute the sum of all words plus the number of formulae. A customized sum may be computed by speficying \code{-sum=n,n,\ldots} with up to seven numbers separated by commas giving the weight (0=don't count, 1=count once) of each of the seven counts: e.g. the default is equivalent to \code{-sum=1,1,1,0,0,1,1}. To count words only, use \code{-sum=1,1,1}. Higher weights may also be used, e.g. to count displayed formulae or floats/figures as a given number of words.
+
+Specifying \code{-sum} has two main effects: the cumulative sum is added to the output in verbose formats, and the sum is added to the summary. If combined with \code{-brief}, the option \code{-total} is automatically set, resulting in a one line output containing only the total sum.
+
+For adding subcounts e.g. by sections, the option \code{-sub} (or \code{-subcount}) may be used. By default, this produces subcounts by part, chapter, section and subsection which are listed in a brief format. One may, however, specify \code{-sub=} followed by \code{part}, \code{chapter}, \code{section}, or \code{subsection} (default when given without value). Break points which initiate a new subcount may also be specified within the \LaTeX{} document using \code{\%TC:break name}.
+
+If included files are included in the count (\code{-inc}), counts per file will be produced followed by a total count. Note that the counts for the included files are not included in the counts for the main document, and in particular is not included in the subcounts (e.g. per section). To suppress per file counts, the option \code{-total} may be used.
+
+By adding the option \code{-freq}, \TeXcount{} will output the word frequencies in order of descending frequency: this is only done for the total count, not per file. You may restrict the frequency table to words occurring at least $n$ times by specifying \code{-freq=\it{n}}. \TeXcount{} will count words irrespective of case, but the output will retain upper case where this is consistently used. Note that \TeXcount{} may not recognise that words are the same if they are written differently in the code, e.g. \code{{U}pper} and \code{Upper}.
+
+A frequency table for each script type (alphabetic, Han, etc. or script classes like Greek, Hebrew etc. if specified in \code{-alphabets=}) is produced by the option \code{-stat}. 
+
+
+\subsection{Parsing options}
+
+\TeXcount{} uses regular expressions to identify words and macro options. By default, these have been set so as to fit most common usages. However, some users may find the default to be too strict, e.g. not recognise options that are long and contain less common symbols. More permissive patterns may be selected by using the option \code{-relaxed}. This allows more general document elements to be identified as words or macro options, which may sometimes be desired, but may also have undesirable effects, so check the verbose output to verify that \TeXcount{} has counted the appropriate elements. Conversely, if the default settings tends to combine words that should be counted as separate words, you may try the option \code{-restricted}. 
+
+Macro options, i.e. \code{[\ldots]} after macros and macro parameters are ignored. Since \TeXcount{} has no specific knowledge of which macros take options, this is a general rule applied to all macros that take parameters\footnote{For macros that take no parameters, \code{[\ldots]} is not interpreted as a macro option. While slightly inconsisten, this avoids e.g. \code{\{\bs{bf}[bold text]\}} to be gobbled up as a macro option and ignored}. In order to avoid that uses of [\ldots] that are not macro options are mistaken as such, \TeXcount{} makes some restrictions on what may be contained in such an option. By default, this restriction is relatively strict under the assumption that it is better to count a few macro options as words than risk large fragments of text to be ignored. However, if your document contains macro options with more complicated values (e.g. certain special characters or macros), using \code{-relaxed} may help handle these correctly.
+
+By default, \TeXcount{} does not allow special characters or macros to be part of words. This may cause problems if character modifiers or some special characters are used which are entered as macros. The \code{-relaxed} option makes the word recognition regular expression somewhat more general.
+
+
+\subsection{File inclusion}
+
+By specifying \code{-inc} or \code{-merge}, \TeXcount{} will automatically count documents that are included using \code{\bs{input}} or \code{\bs{include}}. The difference between the two is that \code{-inc} analyses the included files separately, while \code{-merge} merges the included documents into the parent document. Thus, \code{-inc} will result in one count per file and a total sum at the end, while \code{-merge} will treat the merged document as if it was one file.
+
+The default option is \code{-noinc} indicating that included documents are not counted.
+
+Paths can absolute or relative. Relative paths are by default relative to the working directory, although e.g. the \code{import} package can cause files to be included from other directories. The working directory is by default set to be the current directory: i.e. the directory from which \TeXcount{} is executed. This default behaviour corresponds to the option \code{-dir=.}.
+
+The working directory can be specified explicitly by the \code{-dir=\parm{path}} option. The file names on the command line should still be relative to the current directory, i.e. the one from which \TeXcount{} is executed, while files included within these will be relative to the specified working directory.
+
+Alternatively, if \code{-dir} is used without setting a path, the working directory is determined by the directory containing the top level \LaTeX{} documents, i.e. the document specified on the command line; if several files are provided on the command line, these may result in different working directories. Note that \code{-dir} and \code{-dir=} are fundamentally different: the first indicates that the working directory is determined by the top level \LaTeX{} documents, while the second fixes the working directory to be the current directory.
+
+Note that when included documents are parsed as separate files, i.e. using \code{-inc}, the text of included documents is not included where the \code{\bs{input}} or \code{\bs{include}} is located. This has two consequences. First, since word counts are produced per file, subcounts, e.g. by chapter, will only include the text in the same file, not that of the included file. Secondly, if TC-instructions to \TeXcount{} are embedded in the \LaTeX{} document, e.g. defining additional macro handling rules, these take effect in the order they are parsed by \TeXcount{}. Since included documents are parsed after the parent document, definitions in the parent document will be in effect for the included documents; definitions made in the included documents will only be in effect for subsequently included documents, not in the parent or previously included documents.
+
+In addition to the \code{-dir} option for setting the working directory, there is a similar option \code{-auxdir} for setting the path to the auxilary directory where e.g. the bibliography \code{.bbl} file should be read from. The default setting is \code{-auxdir} which means that working directory is used. However, \code{-auxdir=\parm{path}} can be used to overrule this and set an alternative path. If \code{-dir=\parm{path}} is used, the auxilary path should be relative to the current directory, not to the working directory specified with \code{-dir=\parm{path}}; if \code{-dir} is used, as is the default, the working directory will be the directory containing the top level \LaTeX{} dodcuments (the ones specified on the command line), and the auxilary path will be relative to this, unless an absolute path is specified.
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{Macro handling rules}
+
+A few special macro handling rules are hard-coded into the \TeXcount{} script: i.e. the handling of those can only be changed by editing the script. However, \TeXcount{} primarily relies on a few general rules and macro and environment handling rules that follow a specific pattern.
+
+
+\subsection{General macro handling rules}
+
+The general macro handling rules fall into a few general categories:
+
+\begin{description}
+
+\item[Macro]In its simplest form, this type of rule just tells how many parameters to ignore following the macro. More generally, one may specify the number of parameters a macro takes and how each of these should be handled. Options enclosed in \code{[]} before, between and after parameters are also ignored; this also applies to macros not specified here, so for a macro with no rule, immediately following \code{[]}-options will be ignored. (This type of rule was called an exclude rule in older versions of \TeXcount{}, the reason being that the rule originally only gave the number of
+parameters to ignore following a given macro.)
+
+\item[Environment]For environments enclosed by \code{\bs{begin}\{\textit{name}\}} and \code{\bs{end}\{\textit{name}\}}, there are rules specifying how the contents should be interpreted. A macro rule is added for \code{begin\textit{name}} (without the backslash!) which is \TeXcount{}'s internal representation of \code{\bs{begin}\{\textit{name}\}}. Note that special characters like \code{*} may be part of the environment name, e.g. as in \code{equation*} and rules for these need be specified\footnote{Previously, trailing \code{*} was supposed to be ignored so the same rule would apply to environment \code{equation*} as to \code{equation}. However, due to a bug in a regular expression, this did not work as intended and I have decided not to follow that strategy and instead speficy these rules explicitly.}. \emph{Previously, environment rules were referred to as 'group rules', but I have now renamed this both in the \TeXcount{} script and documentation, and replaced \code{group} by \code{envir} wherever appropriate.}
+
+\item[Macroword]This type of rule indicates that the macro itself represents one or more words. Initially, \code{\bs{LaTeX}} and \code{\bs{TeX}} are defined with values 1 indicating that each represents one word.
+
+\item[Preamble]A few macros should be counted even if they are in the preamble. In particular, \code{\bs{title}\{title text\}} is counted as a header assuming it will later be used to produce a title.
+
+\item[Float inclusion]Within floats (environments with the \code{float} parsing rule) there may be texts that should still be counted: in particular captions. These are specified with the float inclusion rule.
+
+\end{description}
+
+Previously, there was also a separate header handling rule, but this is now incorporated into the more general macro handling rules.
+
+A macro parameter is normally on the form \code{\{\textit{something}\}}; more generally it may be anything \TeXcount{} parses as a single unit (or token), e.g. a macro, but since \TeXcount{} parses word by word rather than character by character this may not always be correct if parameters are not \code{\{\}}-enclosed or macros. In addition, some macros take optional parameters which are usually on the form \code{[\textit{option}]}, and \TeXcount{} can also (from version 4) count these.
+
+
+\subsection{Special macro handling rules}
+
+Some macros do not follow the pattern used by \TeXcount{} to represent macro handling rules. For some of these, special handling rules have been hard-coded into the \TeXcount{} script. For some, the macro syntax differs from the general rule, while in other cases the macros may trigger special processing.
+
+\begin{description}
+
+\item[file include]If \code{-inc} is specified, included files will also be parsed and the total presented at the end. Initially, \code{\bs{input}} and \code{\bs{include}} trigger file inclusion, but more file inclusion macros may be added to the \code{\%TeXfileinclude} hash. In addition to potentially triggering file inclusion, the syntax may differ in that \code{input} does not require the file name to be enclosed in \code{\{\ldots\}}.
+
+\item[package include]When packages are included using \code{\bs{usepackage}{name}}, \TeXcount{} will check for package specific macro handling rules to include. Initially, only \code{\bs{usepackage}} triggers package inclusion, but more macros may be added to the \code{\%TeXpackageinc} hash.
+
+\end{description}
+
+Complete \LaTeX{} documents should start with a \code{\bs{documentclass}} specification, then a preamble region which should not contain typeset text, before the main document starts with \code{\bs{begin}\{document\}}. However, \LaTeX{} files which are ment to be included into a document will not contain \code{\bs{documentclass}} and \code{\bs{begin}\{document\}}. A rule to recognise the preamble region is hard-coded into \TeXcount{}.
+
+Rules for identifying \code{\$\ldots\$}, \code{\$\$\ldots\$\$}, \code{\bs(\ldots\bs)}, and \code{\bs[\ldots\bs]} as formulae are hard-coded and basically parse until the closing token is encountered.
+
+The macros \code{\bs{def}} and \code{\bs{verb}} have hard-coded rules since these do not follow the pattern for macro handling rules, but may contain \LaTeX{} code which could seriously disrupt the parsing, e.g. by containing unclosed \code{\bs{begin}}. Macros like \code{\bs{newcommand}}, however, are handled by ordinary macro rules.
+
+The macro \code{\bs{biblography}} is handled to check if the bibliography file should be parsed. The \code{thebibliography} environment is also handled differently, one difference being that a bibliography header is added to the count.
+
+
+\subsection{Package specific macro handling rules}
+
+Starting with version 2.3, \TeXcount{} can handle different sets of macro handling rules for different packages. When a package is included in the \LaTeX{} code or through the \code{-incpackage} option, rules defined for the given package is added.
+
+Note that \TeXcount{} is still doing the analyses sequentially. It is therefore critical that the package inclusion takes place before any use of the package which may make a difference if you are analysing several files. E.g. if the main file contains \code{\bs{input} setup}, any packages included in \code{setup.tex} will not apply to the main file since this is parsed before \TeXcount{} parses \code{setup.tex}.
+
+As of now, the package support is sparse since most macro handling rules have been included in the main set of rules.
+
+
+\subsection{Bibliography handling}
+
+By default, the bibliography is not included in the word count. If the \code{-incbib} option is specified, however, bibliography parsing is turned on. If the bibliography is included from the \code{bbl} file using the \code{\bs{bibliography}} macro, this will be parsed as if included with the \code{-inc} option. If \code{-merge} is specified together with \code{-incbib}, the bibliography will be merged into the document.
+
+Note that bibliography parsing may be non-trivial and depend on the bibliography style used, so the verbose output should be checked: some styles perform considerable formatting which may confuse \TeXcount{}. In addition, initials, page numbers, etc. will all be counted as words, which may result in a word count which is higher than intendet.
+
+
+\subsection{Adding or modifying macro handling rules}
+
+There are basically two different ways in which you can add additional macro handling rules, e.g. for your own macros, or modify existing rules: by modifying the \TeXcount{} script, or by adding the rules through \TeXcount{} instructions embedded in the \LaTeX{} code.
+
+The simplest method is to use \TeXcount{} instructions which are embedded in your \LaTeX{} document as \LaTeX{} comments on the format \code{\%TC:\textit{instruction}}. This approach is described in some detail in section \ref{subsec:TC_addrule}.
+
+It is also possible to modify the \TeXcount{} code. The macro handling rules are mostly defined in the hash tables named \code{TeXmacro}, \code{TeXenvir}, etc., and editing these definitions is simple and does not required in-depth knowledge of Perl. A brief overview of the \TeXcount{} code is provided in section \ref{sec:code}. 
+
+
+\subsection{Cautions!}
+
+Since the rules are of a relatively general nature, macros that have a great deal of flexibility are hard to deal with. In particular this applies to macros with a variable number of parameters or where the handling of the parameters are not constant.
+
+By default, \TeXcount{} assumes that macro options, i.e. parameters on the form \code{[\ldots]}, should not be counted. From version 4.0, \TeXcount{} allows rules for optional parameters, but in most cases where optional parameters have not been specified in the macro handling rules, they will simply be ignored. There is some risk of misinterpreting text as an option: e.g. \code{\bs{bf}[text]}. This is not likely to be a frequent problem. However, if something like \code{\bs{bf}[a lot of text]} gets ignored because it is considered an option, it can influence the word count substantially. I have therefore been somewhat restrictive with what (and how much) may go into an option. The default restriction on what may be allowed as an option may sometimes be too restrictive, causing \TeXcount{} to interpret options as text or macro parameters; you may use the command line option \code{-relaxed} to relax this restriction and allow more general options.
+
+More advanced macros are not supported and can potentially confuse \TeXcount{}. In partcular, if you define macros that contain unbalanced \code{\bs{begin}}--\code{\bs{end}}, this will cause problems as \TeXcount{} needs to keep track of these to know where environments start and end.
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{Output from \TeXcount{}}
+
+\TeXcount{} will by default provide a summary of the word and element counts. This may, however, be modified either by specifying \code{-brief} which reduces it to a one line summaryper file, \code{-total} to suppress per file summaries, or by providing an alternative template.
+
+If there are parsing errors, \TeXcount{} will print warnings about these. You may turn off this by specifying \code{-quiet} (\code{-q} for short), but there will still be an added comment about the number of errors in the final statistics to warn you of any errors.
+
+
+\subsection{Count statistics}
+
+The summary output will by default provide a summary of all counts: i.e. word counts for text, header and captions, and the number of floats/tables, headers, inlined and displayed formulae. You may combine these into a summary count by using the \code{-sum} option which by default gives the total number of words and formulae. You may choose briefer output formats by using the \code{-brief} option which produces a one-line summary of the counts. The option \code{-1} is the same as specifying \code{-brief -total} and will give only one line of output for the total only. Combining \code{-brief} with \code{-sum} will cause only the sum to be printed rather than the full set of counts.
+
+If multiple files are processed in one run, \TeXcount{} will by default provide summary statistics per file. If files are included (using the \code{-inc} option), summaries of all files are provided as well as the total. If there is more than one file, i.e. main \LaTeX{} documents provided in the command line, it will also write a total summary.
+
+In order to only write the total summary, use the option \code{-total}. If there is only one file processed, the result will be similar except that subcounts (counts per section etc.) are not provided with the total count.
+
+
+\subsection{Customising the summary output}
+
+You may specify an output template to use instead of the default output formats. This will replace the output per file or for the total with output produced using this template.
+
+The template is a string with codes for inserting the count values and titles. To specify it, use the option \code{-template="\textit{template}"}. The encapsulating \code{"\ldots"} are required if the template contains spaces. You may insert line shifts by using \code{\bs{n}}.
+
+The counts may be included by using the counter keywords: \code{word}, \code{headerword}, etc. Other codes that may be inserted are: \code{\{SUM\}} to insert the count as specified by the \code{-sum} option, \code{\{TITLE\}} for the title (e.g. section name) and a header (same as title unless \TeXcount{} has replaced it), \code{\{ERROR\}} for the number of parsing errors, \code{\{WARNINGS\}} for the number of distinct warnings, or \code{\{NWARNINGS\}} for the total number of warnings. Some of these also have shortened forms\footnote{Previously, one-letter versions of some of these codes were permitted, but that is no longer the case.} like \code{\{ERR\}} and \code{\{WARN\}}.
+
+Conditional inclusion may be performed using the format \code{\{\textit{label}?\ldots?\textit{label}\}} where \code{\textit{label}} is one of the counter keywords, \code{SUM}, \code{ERROR} or \code{TITLE} (or their alternative forms). The enclosed text will then be included only if the corresponding value exists and is non-zero. If you wish to include an alternative text when the value is non-existant or zero, use the format \code{\{\textit{label}?\textit{if non-zero}|\textit{if zero}?\textit{label}\}}.
+
+Subcounts, e.g. per section, may be included by using \code{\{SUB|\textit{template}|SUB\}} with a separate template text specified for the subcounts. This will only be included if there is more than one subcount, and in order to conditionally include prefix and suffix you may use \code{\{SUB?\textit{prefix}|\textit{template}|\textit{suffix}?SUB\}}.
+
+Note that you have to insert line shifts yourself. \TeXcount{} will only insert one line shift after each file count, and not after the total count: if you process only one file and want only to output the total sum without a line shift at the end, use \code{-sum -total -template="\{SUM\}"}, which should give the same output as \code{-1 -sum} when there are no parsing errors.
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{\TeXcount{} instructions in the \LaTeX{} document}
+
+It is possible to give some instructions to \TeXcount{} from within the
+\LaTeX{} document. These can be used to control the parsing of the document and add custom made macro and environment handling rules directly from the \LaTeX{} document. The general format of these instructions is
+\codeline{\%TC:\textit{instruction \alt{parameters}}}
+which \LaTeX{} will interpret as a comment but \TeXcount{} will detect.
+
+Adding your own macro handling rules is relatively simple. While it is fairly easy to edit the script to add more rules, this has the disadvantage that the modifications will be lost if updating to a new version of \TeXcount. A better and more flexible solution is to include instructions to \TeXcount{} in the \LaTeX{} documents, alternatively to make a definition file in which new macro handling rules are defined. The \TeXcount{} instructions for doing this take the form
+\codeline{\%TC:\textit{instruction name parameters \alt{option}}}
+where in some rules \code{name} is the macro name (including backslash), and some rules use the or environment name. Note that rules that take the macro name as an argument can (usually) by applied to \code{\bs begin\{\textit{name}\}} by specifying it as \code{begin\textit{name}} (no backslash) which is how \TeXcount{} represents \code{\bs begin\{\textit{name}\}} internally.
+%
+\input{sub_addrules}
+
+Note that macro handling rules are added successively throughout the session: i.e. if more files are parsed, handling rules from previously parsed files still apply. This has advantages as well as disadvantages. If you give a list of files with the rules specified in the first file, these rules will be applied to all the documents. However, if you use the \code{-inc} option, included files will be parsed only after \TeXcount{} has finished parsing the file in which they are included, so any rules specified in these will not apply to the initial document.
+
+A few additional \TeXcount{} instructions exist to control the overall parsing and counting:
+
+\input{sub_tc_other}
+
+In addition, in part with debugging in mind, the following \TeXcount{} instructions exist:
+
+\begin{description}
+
+\option[log \opt{text-line}]
+Writes a line of text to the verbose output. Counters may be included in the text using the \code{\{name\}} format.
+
+\option[assert \opt{counts} \opt{text-line}]
+This takes a list of counts separated by comma, and writes the text (which may contain counters on the format \code{\{name\}}) if the asserted counts do not match the actual counts.
+
+\end{description}
+
+These may be subject to change at some later point, although the functionality should remain.
+
+
+\subsection{Parameter and content handling rules}\label{subsec:TC_addrule}
+
+There are a set of alternative rules that may be used for parsing macro parameters and environment contents. These rules, or \emph{parser states}, are identified by keywords:
+%
+\input{sub_ruletypes}
+
+The keys are used to identify the rule. Environment content rules are simply specified by giving the desired key. Parameter rules are on the form \code{[\textit{rule},\textit{rule},\ldots]} with one rule provided per parameter, where each rule is either one of the above keywords or \code{option:\textit{rule}} to indicate an optional parameter on the form \code{[\ldots]}; alternatively, a single integer can be provided (not enclosed in \code{[]} to indicate that the indicated number of parameters should be ignored.
+
+The list of parser states is in order of increasing priority: i.e. when a parser state is specified as a rule for parsing a parameter or environment content, this will only take effect if it has higher priority than the current state. Thus, text within an ignored or excluded region will not be counted.
+
+The formerly used numberic codes are listed at the end of the keyword list for each state. Before version 4, these numeric codes were used to specify parsing rules, but although these should still work, using the named keywords is highly recommended.
+
+The transitional states indicates an incrementation of one of the counters and then change to another state: e.g. if the \code{header} rule is specified, this will first cause the header counter to be incremented, and then a change to the \code{headerword} state in which word counts are added to the header word counter. 
+
+\subsubsection{Adding a macro handling rule}
+
+The \TeXcount{} instruction for adding (or changing) a rule for how \TeXcount{} handles a specific macro takes the form
+\codeline{\%TC:macro \textit{macro-name parameter-rules}}
+where the macro name includes the backslash and the parameter rules can be an integer or a \code{[]}-enclosed list as explained above.
+
+If a list, \code{[\textit{rule},\textit{rule},\ldots]}, of parsing rules is provided, the macro will be assumed to take this number of parameters. Each rule is either a keyword signifying the rule, or parser state, with which the parameter will be parsed, or \code{option:\textit{key}} for optional \code{[]}-enclosed parameters. Additional \code{[]}-enclosed options, between or after the macro and the parameters, will be ignored.
+
+Macro handling rules specified for macros \code{\bs{\textit{name}}} automatically apply to \code{\bs{\textit{name}*}}: i.e. a \code{*} is automatically gobbled up as a macro modifier. 
+
+Here are some examples together with corresponding macro definitions:
+
+\begin{lstlisting}
+%TC:macro \refnote[text,othertext]
+\newcommand\refnote[2]{\textit{#1}\footnote{#2}}
+
+%TC:macro \newsection [header,ignore]
+\newcommand\newsection[2]{\section{#1}\label{sec:#2}}
+
+%TC:macro \NB 1
+\newcommand\NB[1]{\marginpar{#1}}
+\end{lstlisting}
+
+The predefined rules can easily be read off the script file: they are hash maps defined at the beginning of the script with names \code{TeXmacro}, \code{TeXenvir}, etc.
+
+\subsubsection{Adding an environment handling rule}
+
+Rules for environments may be added on the format
+\codeline{\%TC:envir \parm{name} \parm{parameter-rules} \parm{content-rule}}
+for parsing \code{\bs{begin}\{name\}\ldots\bs{end}\{name\}}. The parameter rules are specified as for the \code{macro} rule and is used to process the parameters that follow \code{\bs{begin}\{name\}}. The content rule is a single parsing rule to use on the environment content.
+
+\begin{lstlisting}
+%TC:envir theorem [] text
+\newtheorem{theorem}{Theorem}
+\end{lstlisting}
+
+\subsubsection{Adding rules that apply to the preamble and float contents}
+
+Within the preamble (from \code{\bs{documentclass}} to \code{\bs{begin}\{document\}}) and within floating objects (tables, figures, etc. parsed using the \code{float}/\code{isfloat} states), texts and macros are generally ignored. However, it is possible to specify particular macro handling rules that apply within these regions by using the \code{preambleinclude} and \code{floatinclude} \TeXcount{} instructions. These take the same format as the \code{macro} instruction:
+\codeline{\%TC:preambleinclude \textit{macro-name parameter-rules}}
+\codeline{\%TC:floatinclude \textit{macro-name parameter-rules}}
+It is possible for the same macro to specify different rules for preamble, floats and general use, although for most uses these should be expected to be the same.
+
+Preamble inclusion is typically used for macros like \code{\bs{title}} that define text that should be counted although it may be placed in the preamble. Another use is that macros that may occur in the preamble, like \code{\bs{newcommand}} and may contained unbalanced \code{\bs{begin}}--\code{\bs{end}} pairs, require a stronger exclusion than the regular \code{ignore} rule even in the preamble to ensure \TeXcount{} is not confused by these.
+
+Float inclusion is used e.g. for captions, and the parsing rules should normally be to count texts using the \code{otherword} parsing rule.
+
+
+\subsection{Count macro, either as words or in other counters}
+
+Some macros, e.g. \code{\bs{LaTeX}}, generate words and should be counted as words. Other macros can generate other elements, e.g. headers or figures. Rules for counting macros can be specified as
+\codeline{\%TC:macroword \textit{macro} \textit{number}}
+where the parameter is the number of words produced by the macro, or
+\codeline{\%TC:macroword \textit{macro} [\textit{countername},\ldots]}
+which causes each of the counters in the list to be incremented by one (or more if listed multiple times).
+
+The counters for counting the number of files, text words, etc. are stored in an array. In some cases, e.g. when \code{-sum=} is specified, the order of the counters in this array is used to specify the rule. However, in most cases the counters should be specified by keywords. The counters, their index number and keywords are:
+%
+\begin{description}
+\def\option[#1]#2{\item[#1.] (keys: \code{#2})}
+\option[0]{file} Number of files.
+\option[1]{text, word, wd, w} Number of words in text.
+\option[2]{headerword, hword, hwd, hw} Number of words in headers.
+\option[3]{otherword, oword, owd, ow} Words outside text, e.g. in floats/tables/figures.
+\option[4]{header, heading, head} Number of headers.
+\option[5]{float, table, figure} Number of floating environments, e.g. tables and figures.
+\option[6]{inlinemath, inline, imath, eq} Number of inlined mathematics formulae.
+\option[7]{displaymath, dsmath, dmath, ds} Number of displayed equations.
+\end{description}
+
+Examples of uses:
+
+\begin{lstlisting}
+%TC:macroword \TeXcount 1
+\newcommand\TeXcount{{\TeX}count}
+
+%TC:macroword acknowledge [header,hword]
+\newcommand\acknowledge{\section*{Acknowledgements}}
+\end{lstlisting}
+
+
+\subsection{Specifying file inclusion macros}
+
+In addition to \code{\bs{input}} and \code{\bs{include}}, which are the standard \LaTeX{} macros for file inclusion, there are packages such as \code{import} intended to enable organising files into subfolders. \TeXcount{}, from version~3, adds some support for macros that change the path from which files are included. If the user needs to add additional file inclusion macros, the format is
+\codeline{\%TC:fileinclusion \parm{macro} \parm{file-parameters}}
+where the file parameters are a comma separated list of keywords, each corresponding to a macro parameter. Available parameters are:
+%
+\begin{description}
+
+\item[\code{input}:] This is a special keyword to use with \code{\bs{input}}. The handling of the parameter values is as \code{file}, but the parameter itself is not required to be enclosed in \code{\{\}}.
+
+\item[\code{file}:] This parameter simply gives the name of or path to a file. If the file is not found, \TeXcount{} will append \code{.tex} and try again.
+
+\item[\code{texfile}:] This parameter gives the name of or path to a file, but \code{.tex} will be appended, and is the rule used by \code{\bs{include}}.
+
+\item[\code{dir}:] This parameter provides the path of a directory relative to the \code{\$workdir}, and adds this to the search path before including any files. This is used with the \code{\bs{import}} macro of the \code{import} package.
+
+\item[\code{subdir}:] This parameter provides the path of a directory relative to the current directory, and adds this to the search path before including any files. This is used with the \code{\bs{subimport}} macro of the \code{import} package.
+
+\item[\code{<bbl>}:] This is a special keyword to use with \code{\bs{bibliography}} to specify inclusion of the bibliography file. It is different from the other keywords in that it does not take a macro parameter.
+
+\end{description}
+
+Examples showing how some existing macros, from basic \LaTeX{} and from the \code{import} package, are defined:
+
+\begin{lstlisting}
+%TC:fileinclude \input input
+\input macros.tex
+%TC:fileinclude \include texfile
+\include{intro}
+%TC:fileinclude \import dir,file
+\import{supplements/}{overview.tex}
+%TC:fileinclude \subimport subdir,file
+\subimport{tables/}{data.tex}
+\end{lstlisting}
+
+
+\subsection{Adding subcount break points}
+
+By specifying \code{-sub}, \TeXcount{} can produce subcounts, e.g. per section. Alternatively, or in addition, explicit break points can be entered in the \LaTeX{} document using the TC-instruction \code{break}. These take the form:
+\codeline{\%TC:break \textit{title}}
+A title (or name) may be given to identify the break point.
+
+If you define new section macros or macros you wish to cause a break point, these may be specified using the TC-instruction \code{breakmacro}:
+\codeline{\%TC:breakmacro \textit{macro} \textit{label}}
+This defines the given macro to cause a break point, and uses the given label to indicate the type of break (e.g. Section, Chapter, etc.).
+
+
+\subsection{Ignoring segments of the file}
+
+The TC-instruction \code{ignore}, later canceled by \code{endignore}, may be used to turn of all counting in a segment of the \LaTeX{} file. The ignored segment should thus be started by
+\codeline{\%TC:ignore}
+and ended by
+\codeline{\%TC:endignore}
+causing all text inbetween to be ignored.\footnote{In older versions, \TeXcount{} would still parse this text and might thus be affected by unbalanced braces. As of version 2.3, however, this should be fixed to make the ignore instruction more robust.}
+
+
+\subsection{Bibliography inclusion}
+
+In order to include the bibliography in the word counts, you can either specify \code{-incbib} on the command line, or use \TeXcount{} instruction
+\codeline{\%TC:incbib}
+which has the same effect: it specifies handling rules for the \code{\bs{bibliography}} macro and \code{thebibliography} environment that causes the bibliography to be included in the count, and if necessary the \code{.bbl} file to be included (without requiring \code{-inc} or \code{-merge}).
+
+
+\subsection{Text substitution prior to parsing}
+
+There are cases where a macro needs to be substituted with a text prior to parsing. One such case is when a macro contains a file path which is later used by a file inclusion macro. Since \TeXcount{} does not actually expand the macros, it will not be able to generate the file path from the macro. Instead, one may perform an explicit substitution
+\codeline{\%TC:subst \parm{macro} \parm{text}}
+which will then cause all occurrences of the macro to be substituted by the provided text prior to parsing. Note that this substitution will therefore also be found in the verbose output. 
+
+\begin{lstlisting}
+\newcommand\chappath{chapters}
+%TC:subst \chappath chapters
+\input \chappath/chapter1
+\end{lstlisting}
+
+Note that the substitution is placed \emph{after} the \code{\bs{newcommand}} definition. Otherwise, the substitution would have taken effect, changing that line to \code{\bs{newcommand} chapters/chapters}.
+
+
+
+\subsection{Adding a new counter}
+
+Initially, \TeXcount{} has eight different counters: file, text words, header words, other words, number of headers, number of floating objects, number of inlined formulae, and number of displayed formulae. However, it is possible to add more counters, e.g. to count footnotes separately. The syntax is
+\codeline{\%TC:newcounter \parm{name} \opt{description}}
+where the given name is used as keyword to refer to this counter. If no description is provided, the name will be used as description. A new counter is then added, and a parsing rule (parser state) with the same name is added which may be used in specifying macro and environment handling rules.
+
+The following example shows how two different counters are added: one to count the number of footnotes, and another to count the words in footnote.
+
+\begin{lstlisting}
+%TC:newcounter fwords Words in footnotes
+%TC:newcounter footnote Number of footnotes
+%TC:macro \footnote [fwords]
+%TC:macroword \footnote [footnote]
+Each footnote\footnote{Words in footnotes will be counted separately.} will be counted.
+\end{lstlisting}
+
+Note that we have to specify one rule for counting the words in footnotes, and another rule for counting the footnotes. Unlike headers and floating bodies, there are no transition states available that can do both.
+
+
+% ---------------------------------------------------------------------------
+
+\section{Using an option file}
+
+If you have a lot of settings, e.g. output template and TC commands for specifying parsing rules, you may place these into a file and include this using \code{-opt=\textit{file}}.
+
+The format of this file is quite simple: each line is read as one option, so different options should not be placed on the same line. If some options are so long you need to break the line, e.g. for specifying an output template, you can do so by placing \code{\bs{}} at the start of lines that continue the previous line.
+
+You may enter TC commands just as in the \LaTeX{} code by starting the line with \code{\%} instead of \code{TC:}. Using these, you may include specifications of parsing rules.
+
+Blank lines and lines starting with \code{\#} are ignored and may thus be used to add comments to the option file. So are leading spaces, which allows lines to be indented. Line breaks may be inserted by \code{\bs{n}}.
+
+Here is an example which sets the total sum to be the number of words (not including formulae), subcounts by section, parses included files, and adds an output template.
+
+\begin{lstlisting}[frame=single]
+### Options to use with TeXcount
+
+# Counting options
+-sum=1,1,1
+-sub=section
+-inc
+
+# Macro rules
+%macro \url 1
+%envir sourcecode 0 0
+%macroword \TeXcount 1
+
+# Path used in file inclusion (\chapterpath filename)
+%subst \chapterpath chap/
+
+# Output template
+-template=
+   \::: {title} :::\n
+   \Words: {sum}\n
+   \Formulae: {6} + {7}\n
+   \{5?Number of floats: {5}\n?5}
+   \{SUB? - {sum} words in {title}\n?SUB}
+\end{lstlisting}
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{Customising \TeXcount{}}
+
+\TeXcount{} is a self-contained Perl script: no external packages or resources required except that you need to have Perl installed to run it. Unfortunately, as with much of Perl code since Perl does not itself encourage structured programing, after expanding somewhat in size, it is not the most readable of codes. However, there may still be cases where you might yourself want to modify the code.
+
+There are some things that may be modified quite easily even without knowing Perl.
+
+\begin{description}
+
+\item[Preset startup options]On one of the first lines of the code, the list \code{@StartupOptions} is defined. A list is simply a sequence of values (an array) on the form \code{(\textit{value},\textit{value},\ldots)}. As it stands, this list is empty, but you may add startup options to be included prior to command line options when you run \TeXcount{}. E.g. if you change this to \code{("-inc")} it will automatically add the \code{-inc} option so you don't have to do that yourself every time you run \TeXcount{}.
+
+\item[Adding macro handling rules]While you may add macro handling rules using \code{\%TC:} commands either in the document or in a separate option file, this is inconvenient for large numbers of macros or if you want these rules always to be included. Also, you might want to add such rules for specific packages. In either case, it might be practical to add these directly to the \TeXcount{} code. \TeXcount{} stores the rules in hashes (maps from a key to a value) named \code{\%TeXmacro}, \code{\%TeXenvir}, etc. There is more documentation on each of these in the code itself, and you may also inspect how rules have been defined for other macros and environments.
+
+\item[Output style]The ANSI colour codes for different levels of verbosity are encoded in the \code{\%STYLES} hashes and may be changed. The HTML style is encoded in the method \code{html_head()} and is easily modified. 
+
+\item[Character and word definitions]\TeXcount{} identifies words as those that match one of a given set of regular expressions (defined in \code{@WordPatterns}). Note that \code{@WordPatterns} is changed by options \code{-chinese}, \code{-japanese} and  \code{-letters}. The pattern that is used within the word patterns to recognise letters is stored in \code{\$LetterPattern}. This is replaced if the \code{-relaxed} or \code{-restricted} option is set. Changing these definitions may be useful if you have special characters or wish to define words differently.
+
+\end{description}
+
+
+% ---------------------------------------------------------------------------
+
+\section{Modifying the \TeXcount{} script}\label{sec:code}
+
+\TeXcount{} is written in Perl, and although hardly the best structured and documented code ever seen, I have tried to structure and document it somewhat. In particular, some parts of the code should be easily modifiable even without in-depth knowledge of Perl or the \TeXcount{} script: e.g. the macro handling rules.
+
+For more aid on how the \TeXcount{} script is coded and organised, please consult the Technical Documentation. However, here is a very brief overview:
+
+\begin{description}
+
+\item[Header and imports:] The shebang (\code{\#!}) and package imports (\code{use \parm{package}}).
+
+\item[Global variables (and some methods related to these):] This defines and initialised global variables related to option settings, state variables used in parsing and counting (including functions for unterpreting these), variables and hashes for storing macro handling rules, and character class definitions (must be defined before use).
+
+\item[Main program:] This simply contains a call to the \code{MAIN} routine with the command line arguments.
+
+\item[Routines/functions/procedures:] The first procedure defined is \code{MAIN} which contains the program flow, then follows other subroutines. Routines with capitalised initial letters indicate high-level routines, while routines starting with underscores (\code{_}) are low-level routines.
+
+\item[Text data:] At the end of the file is a \code{__DATA__} region containing text data used by the help routines.
+
+\end{description}
+
+Perl will first process the setup section which defines global variables, arrays and hashes. It then executes the main section (consisting of the call to \code{MAIN}), whereafter it exits. The subroutines and text data follow after the \code{exit}.
+
+
+
+% ---------------------------------------------------------------------------
+
+\section{License}
+
+The \TeXcount{} package---script and accompanying documents---is distributed
+under the \LaTeX{} Project Public License (LPPL)
+\codeline{\url{http://www.latex-project.org/lppl.txt}}
+which grants you, the user, the right to use, modify and distribute
+the script. However, if the script is modified, you must change its
+name or use other technical means to avoid confusion with the original script.
+
+
+\end{document}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/TeXcount.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf	2017-09-19 22:01:25 UTC (rev 45338)

Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,619 @@
+\documentclass{article}
+\usepackage[T1]{fontenc}
+\usepackage{a4wide}
+\usepackage{listings}
+\usepackage{url}
+\usepackage{color}
+
+\include{macros}
+\newcommand\Obj[1]{\textsl{#1}}
+\newcommand\wild{\ldots}
+\newcommand\eqsim{$=\sim$}
+
+% CGI exclusion macro
+%\def\CGI#1\CGIend{} % To exclude CGI text
+\def\CGI#1\CGIend{#1} % To include CGI text
+\def\CGIend{}
+
+%%% Title
+\title{%
+\LARGE \TeXcount\\
+\Large Technical documentation\\
+\Large Version \version\copyrightfootnote
+}
+\author{Einar Andreas R{\o}dland}
+
+\sloppy
+
+\begin{document}
+
+\maketitle
+
+{\abstract%
+The aim of this document is to explain the implementation details of \TeXcount{} with the aim of aiding anyone who wishes to modify the Perl code (including the author). To be of practical use, it will require some knowledge of Perl and familiarity with \TeXcount{}: while full fledged development of \TeXcount{} requires a working knowledge of Perl, code modification may often be done with only limited experience with Perl.
+}
+
+{\scriptsize\tableofcontents}
+
+\pagebreak
+
+
+
+\section{Introduction}
+
+
+\subsection{\TeXcount{} versioning}
+
+The version number is on the form \code{\textit{major}.\textit{version}.\textit{subversion}.\textit{build}}. Main releases contain only the first to terms, implying that subversion and build number are both zero. Minor releases only contain the first three terms. Main as well as minor releases should be functional, tested versions. The subversion number can also be \code{alpha} ($\alpha=-2$) or \code{beta} ($\beta=-1$) for which the testing has been limited. The build number is used to keep track of changes and versions during development: they may be made available, but are purely for testing.
+
+
+\subsection{Some things you need to know about Perl}
+
+\TeXcount{} is written in Perl. The entire script, including macro rules and help texts, is contained in one file. This makes the file somewhat big, and modularisation of the code is therefore not strictly enforced. I have however tried to structure the code somewhat.
+
+Perl has a few build-in data structures which are referenced in somewhat different manners. In this document, it will be important to recognize the difference between three different types of data: regular variables, arrays and hash maps.
+
+\begin{description}
+
+\item[\code{\$\textit{name}=\textit{value}}:] The \code{\$} at the start indicates that it is a Perl variable. The value can be numbers or strings, or it can be a reference which points to another data object (e.g. array or hash map).
+
+\item[\code{@\textit{name}=(\textit{value},\ldots)}:] The \code{@} at the start indicates that this is an array. The positions are indexed from 0 to $\textrm{length}-1$.
+
+\item[\code{\%\textit{name}=(\textit{key}=>\textit{value},\ldots)}:] The \code{\%} at the start indicates that this is a hash map that maps keys to values. The key will usually be a string, but can also be a number.
+
+\item[\code{sub \textit{name}} \{\ldots\}:] This defines a subroutine: function or procedure. Normally, these can be defined anywhere in the script, and I have generally placed them after the main program.
+
+\end{description}
+
+Note that \code{(\textit{value},\ldots)} is a list of values, not an array or a hash: it simply produces a list of values that are used to fill the defined array or hash. Arrays and hashes can also be produced directly by \code{[\textit{value},\ldots]} or \code{\{\textit{key}=>\textit{value},\ldots\}} respectively. Both these return a reference to the array/hash rather than the array/hash itself.
+
+In much of the code, hashes are passed by reference: e.g. if \code{\%hash} is a hash, \code{\$href=\bs{\%hash}} stores a reference to the hash, where the leading \code{\bs{}} causes a reference to be returned rather than the hash itself. Retreiving a value from the hash is done by \code{\$hash\{\textit{key}\}} or \code{\$href->\{\textit{key}\}} if the hash is accessed by reference. Note that individual values in array and hashes are prefixed by \code{\$}: i.e. \code{\$\textit{array}[\textit{index}]} and \code{\$\textit{hash}\{\textit{key}\}}.
+
+\TeXcount{} makes extensive use of regular expressions (regex): expressions on the form \code{\$\textit{string}\eqsim/\textit{pattern}/} and \code{\$\textit{string}\eqsim s/\textit{pattern}/\textit{replace}/}. In \TeXcount{}, the main use is to recognize (and remove) tokens (words, macros, spaces, etc.) at the start of a string of \LaTeX{} code. Some of these may be fairly simple to understand, while others may be more complex.
+
+
+
+\section{Overview}
+
+
+\subsection{Code structure}
+
+\TeXcount{} is written in Perl, and although hardly the best structured and documented code ever seen, I have tried to structure and document it somewhat. In particular, some parts of the code have been written with modifications in mind so that users can make their own changes without in-depth knowledge of Perl or the \TeXcount{} script.
+
+Here's a quick walk-through of the code structure and comments on how easily the code may be modified. Some parts of the code are marked as \emph{CMD specific}. There are two version of the script: the CMD version intended for command line use, and the CGI version used with the web interface. The one you have is the CMD version.
+
+\begin{description}
+
+\item[\em HEADER AND IMPORTS:] \textit{The shebang (\code{\#!}) and \code{use} imports.}
+
+\item[\em INITIAL SETUP:] \textit{These set up global variables prior to execution.}
+
+\item[Settings and setup variables:] The start of the script sets of initial settings and variables. Many of these may be modified by command line options, but if you want to change the default behaviour these may be changed. However, note that there is a list \code{@StartupOptions} intended for this: initially, it is empty, but this is probably the simplest place the change startup options.
+
+\item[Internal states:] As of version 2.3, internal state identifiers (which are numerical codes) have been defined as \code{STATE}, \code{TOKEN} and \code{CNT} variables, and these are also defined here. A few subroutines for interpreting these states have been included here, although most subroutines are defined after the main code, since they are intimately tied to the state's numerical values. None of these are intended to be modified.
+
+\item[Styles:] The style definitions basically define which elements to print for each of the verbosity levels. These map element names to ANSI colour codes. When used with HTML, the element names are used as tag classes. If you wish to change the ANSI colour scheme, or change which elements are written in each verbosity option, these may be changed.
+
+\item[Word pattern definitions:] This section contains regular expression patterns for identifying words and macro options. In addition, the additional character classes defined by \TeXcount{} are defined here. If you have special needs or wishes, modifying these definitions may be an option.
+
+\item[\TeXcount{} parsing rules:] This is the section in which the main rules for interpreting the \LaTeX{} code is specified: the exception is a few hard-coded rules that do not follow these general patterns. These are hashes that map the macro or environment name to the macro handling rules. First, the default rules are defined, then packages specific rules are defined.
+
+\item[\em MAIN:] \textit{This is the top-level code which gets executed. All else is done through calls to subroutines.}
+
+\item[Main \TeXcount{} code:] This is the main code that is run. It is very simple: just a call to the method \code{MAIN} passing the command line options.
+
+\item[\em SUBROUTINES:] \textit{The subroutines are organised into blocks. Subroutines names use capital letters or initials if they are main routines (like public in other languages) to be used at the top-level, lower case if they may be used throughout but are considered to be lower-level subroutines, prefixed by one or two underscores (_) if used only within the block.}
+
+\item[Main routines:] The \code{MAIN} routine gives the general processing flow. This in turn calls routines to parse to command line options, process/apply the options, parse the \TeX/\LaTeX{} files, and finally summarise the final results. The main routines are CMD specific.
+
+\item[CMD specific subroutines:] These are subroutine versions that are CMD specific, e.g. file inclusion and ANSI colours. Their location is somewhat illogical: logically, they might belong later together with related subroutines, but have been placed this early because they are specific to the CMD (or CGI) version.
+
+\item[Option handling:] After parsing the options, the option values are processed using these subroutines. Some of the option handling operations call on global variables, whereas some are more hard-coded. Like the global variables, if you have special wishes or needs, there may be parts here that can be modified quite easily to change default settings or effects of specific options.
+
+\item[\TeX{} object:] The main role of the \code{TeX} object (which is technically not an object in the ordinary sense but just a hash) is to be a container object which links to the \TeX/\LaTeX{} code, the word count object, etc. The \code{TeX} object pertaining to any parsed \TeX/\LaTeX{} file is passed along from subroutine to subroutine, usually called \code{\$tex}. The \code{Main} object produced by \code{getMain} is a simple substitute for the \code{TeX} object for use when none is available, e.g. to catch errors not specific to any particular \code{TeX} object. 
+
+\item[File reading routines:] These are used to read files and STDIN.
+
+\item[Parsing routines:] These contain the main routines for parsing the \TeX/\LaTeX{} code. The main worker method is the \code{_{}parse_{}unit} which parses a block of code: the \emph{unit}. A unit of code may be the contents of an environment, a \code{\{\ldots\}} group, a macro option or parameter, etc. The parsing of one unit is determined by the parsing state, which is passed to the parsing method, and the end marker which indicates which token marks the end of the unit. Different subroutines are then used to process the different types of code: macros, environments, TC instructions, etc. Amongst these routines are also routines for converting the parsed code into tokens, which is done one token at the time which is then removed from the start of the code.
+
+\item[Count object and routines:] The count object contains the counters as an array, plus titles and labels; in addition it can contain a list of subcounts which are themselves count objects. The count object is used for each file, but also to summarise multiple files, and region counts within files (e.g. per section). The \code{TeX} object contains an active count object to which newly counted words, equations, etc. get added. However, each \code{TeX} object also has a summary count object which will contain the final sum. 
+
+\item[Output routines:] First, there are some routines for general output, i.e. independent of specific \code{TeX} objects. There are then some routines for formatting output, e.g. for the verbose output. There are also routines for printing count summaries in various formats. A special set of routines exist for printing the verbose output itself, and some of these are also involved in the parsing.
+
+\item[Help functions:] These routines are used to print help.
+
+\item[HTML functions:] These are routines for producing HTML output. In particular, the HTML style is defined here and may be easily modified.
+
+\item[Text data:] Some texts are not hard-coded into the script, but added as text data at the end. There are some routines defined to handle the text data, and then the text data itself.
+
+\end{description}
+
+Perl will first process the setup section which defines global variables, arrays and hashes. It then executes the main section (consisting of the call to \code{MAIN}), whereafter it exits. The subroutines and text data follow after the \code{exit}.
+
+\CGI
+There is a separate CGI version of the \TeXcount{} script for the web service. While this is mostly the same as the regular command line version, there are some differences in how options are set and \LaTeX{} documents are read. Occasional differences in the CGI version will be commented on, but the main emphasis will be on the command line version of \TeXcount{}.
+\CGIend
+
+
+\subsection{Global variables}
+
+A number of globally defined variables, including constants, arrays and hashes, are defined at the start of the program. These fall into a few different categories.
+
+There are a number of variables defined for storing options and settings, many of which can be modified by command line options. In addition, there are few variables for global summaries and statistics, as well as a few for internal states during parsing.
+
+Global constants are defined to represent different states and counters. One set of constants,\code{\$CNT_\wild}, specify the position of the different counters in the counting array; parser states are defined as \code{\$STATE_\wild}; token types are named \code{\$TOKEN_\wild}. For example, the parsing state \code{\$STATE_TEXT} indicates that a block of \LaTeX{} code should be parsed and have words counted as text words. The constants simply take numerical values, but help make the code more readable. Together with some of these are defined functions for interpreting or transforming these 
+
+Alternative settings for different options are defined in a number of hashes, e.g. \code{\%STYLES} indicating which tokens to print at different levels of verbosity, and \code{\%NamedLetterPattern} which stores alternative regex rules which may be used to recognize letters.
+
+A special set of global settings are the macro handling rules that are stored in a number of hashes: \code{\%TeXmacro}, \code{\%TeXenvir}, etc. as well as similar sets of hashes for package specific rules.
+
+
+\subsection{\TeXcount{} objects}
+
+First, note that what is referred to as objects here are just hash maps with a predefined set of values. However, these serve the same purposes as objects. There are no explicit class specifications defining these, just a set of functions returning hashes that contain the required keys, some of which may even be optional. Still, it is useful to think of them as objects, and their main purpose is to encapsulate data so that they can conveniently be passed around.
+
+\begin{description}
+
+\item[The \Obj{Main} object]
+Each \TeXcount{} session instantiates a singleton \Obj{Main} object. This is used as a replacement when no \Obj{TeXcode} object is available for capturing (counting and storing) error messages and warnings.
+
+\item[The \Obj{TeXcode} object]
+The \Obj{TeXcode} object encapsulates the \LaTeX{} code that is to be parsed as well as counts and lists of reported errors. In the code, it is generally referred to using the \code{\$tex} variable.
+
+\item[The \Obj{count} object]
+The \Obj{count} object is primarily a container for the array of counts: i.e. the array containing word counts and counts of headers, equations, etc. However, it also keeps track of subcounts from contained files, sections, etc.
+
+\end{description}
+
+A more detailed explanation of the different objects is provided in section \ref{sec:objects}
+
+
+\subsection{Main program flow}
+
+The main program consists of a single call to the procedure \code{MAIN}. This does the following:
+
+\begin{description}
+
+\item[\code{Initialise}:]
+Most of the initialisation is done when defining the global variables, but some initalisation required code execution: e.g. OS specific initialisation.
+
+\item[\code{Check_Arguments}:]
+Runs an initial check of the command line arguments passed to \TeXcount{}, e.g. for \code{-help}, and may exit \TeXcount{}.
+
+\item[\code{Parse_Arguments}:]
+Parses the command line arguments, setting option variables, and returns the list of \LaTeX{} files to be parsed.
+
+\item[\code{Apply_Options}:]
+This applies the options set either in the initial setup or initialisation, or when parsing the arguments. While most options are set directly during the argument parsing, settings that may depend on multiple options or options that should be applied only once, e.g. initialising the output and writing the HTML header, are applied here.
+
+\item[Parse files (or write help or error message):]
+The file parsing calls \code{Parse_file_list} with the list of files to be parsed, and this returns the total count object. Apart from this, help, summary output and error reports are produced if required.
+
+\item[\code{Close_Output}:] This just makes sure the output channel is properly closed, e.g. writing closing HTML code.
+
+\end{description}
+
+\CGI
+In the CGI version of \TeXcount{}, \code{Initialise}, \code{Check_Arguments} and \code{Parse_Arguments} are replaced by a single call to \code{Set_Options}. Also, since the CGI version only processes one file, alternatives for parsing and reporting on multiple files are not required and is instead replaced by a single call of \code{parse}.
+\CGIend
+
+
+\subsection{How \TeXcount{} processes \LaTeX{} documents}
+
+The \code{parse} routine is the entry point for parsing \LaTeX{} code of a single file. It takes a \Obj{TeXcode} object, the container object of a \LaTeX{} document and its corresponding \Obj{count} object, performs the parsing of the entire document. The counts are stored in the counter in the \Obj{TeXcode} object.
+
+The hierarchy of delegation from \code{MAIN} down to \code{parse} is as follows:
+
+\begin{description}
+
+\item[\code{MAIN}] calls \code{Parse_file_list} with a list of files which return the total count (a count object) for \code{MAIN} to report.
+
+\item[\code{Parse_file_list}] calls \code{parse_file} for each file in the provided file list, and for STDIN (identified by \code{\$_STDIN_}) if the option to parse standard input has been set. It then aggregates the counts returned by \code{parse_file} into a total count which it returns.
+
+\item[\code{parse_file}] calls \code{_add_file}, first for the main file, and then again for each included file if file inclusion (\code{-inc}) has been set. The aggregation of counts is done by \code{_add_file} into a total count object provided by \code{parse_file}, and this total count object is then returned by \code{parse_file} upon completing the parsing of the main file as well as all included files.
+
+\item[\code{_add_file}] reads the file into memory, creates a \Obj{TeXcode} object which encapsulates the \LaTeX{} code and the counts, and calls \code{parse} to perform the parsing of this \Obj{TeXcode} object. The counts are added directly into the \Obj{TeXcode} object, so only the \Obj{TeXcode} object reference is being passed around.
+
+\end{description}
+
+\CGI
+In the CGI version, \code{parse} is called directly from \code{MAIN} since only one document can be parsed and no file inclusion is possible. 
+\CGIend
+
+
+\subsection{\LaTeX{} code parsing by \code{parse}}
+
+The \code{parse} routine takes a \Obj{TeXcode} object and parses this to the end. It is, however, only the entry point for parsing the \LaTeX{} code: other routines do the main parsing with \code{_parse_unit} being the main work horse. In fact, \code{parse} only initiates the parsing, calling \code{_parse_unit} repeatedly until the end of the file.
+
+The \code{_parse_unit} routine is used to parse one unit or block of \LaTeX{} code: a unit/block can be the a part of the document enclosed in e.g. \{\ldots\} or \code{\bs{begin}\ldots\bs{end}}, or based on context enclosed by e.g. \code{[\ldots]}, or the document at the top level. It is passed the \Obj{TeXcode} object to parse, a parsing state instructing it how the block should be parsed, and optinally a block-end token which tells \code{_parse_unit} when the block ends. The \code{_parse_unit} routine is then called recursively whenever a unit/block is encountered that requires a separate parsing state or closing token.
+
+The parsing state indicates if the block is part of the main text in which words should be counted, a header, equation contents, should be excluded, etc. and is the only state variable of the parser. In addition to the regular states with which the \LaTeX{} code is parsed, there are transition states. E.g. \code{\$STATE_TO_HEADER} indicates that the block should be counted as a header and the contents should then be parsed using \code{\$STATE_TEXT_HEADER} as specified in \code{\%transition2state}.
+
+The document is tokenized, and \code{_parse_unit} retrieves one token at the time by calling \code{next_token}. Depending on the active parsing state and token, different rules (most with their own subroutines) are applied. These rules add to the \Obj{count} object of the \Obj{TeXcode} object by calling \code{_inc_count} and set the presentation style of the verbose output included which tokens to print. The active token and its style is by default stored in the \Obj{TeXcode} object and printed to the verbose output by \code{next_token} upon retrieving the next token, although this is occasionally overrun by calls to e.g. \code{flush_next}.
+
+When \code{_parse_unit} encounters a new block/unit, it will determine the state with which this unit should be parsed based the present state and the context that defines the unit.
+
+
+\subsection{Summary statistics}
+
+The counts are stored in the \Obj{TeXcode} object: subroutines performing the actual parsing increments the appropriate counter upon processing the parsed tokens. The \Obj{TeXcode} object contains a main \Obj{count} object from which summary output is generated. The \Obj{count} object can also contain a list of subcounts, themselves \Obj{count} objects, which may also be presented in the summary.
+
+Depending on options set and the number of files parsed, summary output can range from a single number of the total word count, to an extensive summary for each spcified file with separate summaries for each included file, as well as a total summary.
+
+
+
+\section{Global constants and variables}
+
+There are a number of global variables defined at the start of the script for storing options and settings as well as global counters.
+
+In addition, there are sets of global constants, as well as other globally defined variables, hashes and arrays. Here, we outline the main groups.
+
+
+\subsection{Global constants}
+
+There are a few sets of global constants. The use of global constants makes the code more readable. The sets of global constants are:
+
+\begin{description}
+
+\item[\code{\$STATE_\wild}:] Parsing states, e.g. \code{\$STATE_TEXT} for parsing \LaTeX{} code as regular text and \code{\$STATE_IGNORE} for regious that should not be counted.
+
+\item[\code{\$CNT_\wild}:] Index pointing to the location in the counter array used for a specific count, e.g. \code{\$CNT_WORDS_TEXT=1} indicating that words in text are counted in position 1 of the array.
+
+\item[\code{\$TOKEN_\wild}:] Token types, e.g. \code{\$TOKEN_WORD} and \code{\$TOKEN_MACRO}. When a token is parsed, the \Obj{TeXcode} object stores the token type as well as the token, which can then be used to determine how the token should be interpreted.
+
+\end{description}
+
+\subsubsection{Counter indices: \code{\$CNT_\wild}}
+
+The \Obj{count} object contains an array with the following counts: number of files, number of words in text, number of words in headers, number of words in captions, number of headers, number of floating objects/tables/figures, number of inline equations, number of displayed equations. Storing these are the main purpose of the \Obj{count} object.
+
+Each count has a fixed position in the array, and the \code{\$CNT_\wild} constants provide the positions of each count: e.g. \code{\$CNT_WORDS_TEXT=1} indicates that the counter for words in the text is stored in position 1 of the array. Originally, these positions were hard-coded and directly related to the parsing states, but by using these constants, and keeping the counter indices distinct from the parsing states, the code becomes both more readable and more flexible in case of future changes.
+
+\subsubsection{Parsing states: \code{\$STATE_\wild}}
+
+The parsing states fall into two categories.
+
+First there are parsing states used during the parsing of a unit/block: e.g. \code{\$STATE_TEXT}, \code{\$STATE_MATH}, \code{\$STATE_IGNORE}, ldots. In some of the states, words are counted either as text words, header words or captions words; in other states, words are ignored and the state primarily influences how the parsed \LaTeX{} code is styled in the verbose output.
+
+Secondly, there are transitional states: e.g. \code{\$STATE_TO_HEADER} which indicates the start of a header which should first cause the header count to be incremented and then the contained text to be parsed as header text using the parsing state \code{\$STATE_TEXT_HEADER}. The handling of the transitional states are encoded in \code{\%transition2state} and performed by the \code{transition_to_content_state} routine which is called by \code{_parse_unit}.
+
+Macro handling rules specify how many parameters the macro takes and which parsing states are used to parse each parameter; for environments, it additionally specifies a parsing state for the contents of the environment.
+
+Originally, before implementing the \code{\$STATE_\wild} constants, fixed numerical values were hard coded into the Perl code, and these numerical codes were required for adding new rules. For the macro rules specified within the Perl code of \TeXcount{}, the original numerical codes remain in the initial rule specification. However, from version 2.3 of \TeXcount{}, the intention is that users should no longer use these numerical codes to specify new macro handling rules, but instead use a set of keywords: e.g. \code{text}, \code{header}, \code{ignore}, etc. For this purpose, a hash \code{\%key2state} is defined which maps keywords to parsing states. The original numerical codes are included in this map in part for backward compatibility, but also because this key-to-state map is applied to the macro handling rule hashes \code{\%TeXmacro}, \code{\%TeXenvir}, etc. The \code{\%key2state} has is set up e.g. with
+\codeline{add_keys_to_hash(\bs{\%key2state},\$STATE_TEXT,1,'word','w','wd');}
+which maps the keys \code{1}, \code{'word'}, \code{'w'} and \code{'wd'} all to the value \code{\$STATE_TEXT} (which need not be 1!). However, this specification, which is used to convert keywords to states during initialisation of the macro handling rules and later if adding new rules, ensures that the original numberical codes will be handled as before: \TeXcount{} will be backward compatibile with respect to using the numberical codes to add new macro handling rules through \code{\%TC} commands. 
+
+Although in theory the parsing state numerical codes could be changed without any effect to the code, there are still a few places where the actual numerical values are used: e.g. the routine \code{state_to_text}.
+
+\subsubsection{Token types: \code{\$TOKEN_\wild}}
+
+When the \LaTeX{} code is tokenized, i.e. the string containing the \LaTeX{} code is converted to tokens like words or macros, not only is the token stored in the \Obj{TeXcode} object, but a token type is stored as well indicating if the object is a word, macro, space, symbol, bracket, etc. To make the Perl code more readable, these token type, although just integer values, are represented by constants \code{\$TOKEN_\wild}.
+
+When \code{_parse_unit} parses the \LaTeX{} code, it frequently uses the token type stored in the \Obj{TeXcode} object rather than the token itself to determine how to interpret the parsed tokens.
+
+
+\subsection{Option alternatives}
+
+Some options result in choosing between a number of alternatives for parsing, counting or presentation. These alternatives tend to be defined in arrays or hashes. When an alternative is selected, the corresponding value(s) are copied to a variable, array or hash which may then later be applied or further processed.
+
+\begin{description}
+
+\item[\code{\%BreakPointOptions}:] For keywords like \code{section} or \code{chapter}, this defines which macros indicate a new break point (i.e. initiates a new subcount).
+
+\item[\code{\%STYLES}, \code{\%STYLE}:] The \code{\%STYLES} hash contains different sets of style definitions, used to define the style with which tokens are printed, and are used to set the \code{\%STYLE} hash by \code{Apply_Options} after the options have been processed. Each value of the \code{\%STYLES} is a hash mapping style name to ANSI colour styles. For a given style, only style names defined in the style are printed in the verbose output. If ANSI colour coded output is used, these are the colour codes; otherwise, the ANSI colour styles are not themselves used, but the style name must still be included in the hash to enable the token to be printed.
+
+\item[\code{\%NamedLetterPattern}, \code{\$LetterPattern}:] Named regex patterns are defined in \code{\%NamedLetterPattern} where the selected pattern is stored in \code{\$LetterPattern}. This regex pattern defines what is recognized as letters when parsing \LaTeX{} code.
+
+\item[\code{\%NamedWordPattern}, \code{@WordPatterns}, \code{\$WordPattern}:] Named word patterns are defined in \code{\%NamedWordPattern}. The selected patterns are stored in the array \code{@WordPatterns}. Letters are indicated by a special character, and when the options are applied replaced by \code{\$LetterPattern} and merged into a single regex stored in \code{\$WordPattern}.
+
+\item[\code{\%NamedMacroOptionPattern}, \code{\$MacroOptionPattern}:] Named regex patterns are stored in \code{\%NamedMacroOptionPattern}, and the selected pattern copied to \code{\$MacroOptionPattern}. This pattern is used to recognize macro options which should be excluded from word counts.
+
+\item[\code{\%NamedEncodingGuessOrder}:] For each named language, this gives an array of encodings to try if none is given.
+
+\end{description}
+
+
+
+\section{Details of the \TeXcount{} objects}\label{sec:objects}
+
+These objects are simply hashes that are created with a given set of keys. Some keys may, however, be optional.
+
+
+\subsection{The \Obj{Main} object}
+
+The \Obj{Main} object is used instead of the \Obj{TeXcode} object to capture errors and warnings. It is created by the \code{getMain} routine. The values (keys) it contains are:
+
+\begin{description}
+
+\item[\code{errorcount}:] Numerical value, initialised to 0, used to count the number of errors reported.
+
+\item[\code{errorbuffer}:] Array, initialised to an empty array, used to buffer error messages reported before output is available: e.g. before the header or HTML header has been printed.
+
+\item[\code{warnings}:] Hash, initially empty, used to store warnings.
+
+\end{description}
+
+When errors are reported through calls to \code{error}, they will be stored in the \code{errorbuffer} if this exists, otherwise printed immediately. This is used to store errors reported before e.g. the HTML header has been written. After the appropriate headers have been written and the output channel is ready for writing, a call to \code{flush_errorbuffer} is made which prints all the errors in the errorbuffer and then deletes it so further errors will be printed immediately rather than buffered.
+
+
+\subsection{The \Obj{TeXcode} object}
+
+The \Obj{TeXcode} object is used to encapsulate the \LaTeX{} code and corresponding counts. It is created by the \code{TeXcode} routine. The values it contains are:
+
+\begin{description}
+
+\item[\code{filename}, \code{filepath}:] The name and path of the parsed \LaTeX{} file.
+
+\item[\code{PATH}:] An array containing the paths to search for included documents. At creation, this is empty, but calls to \code{_add_file} will set it; the top level files, initiated from \code{parse_file}, will have this set to \code{\$workdir}.
+
+\item[\code{texcode}:] Initialised with the \LaTeX{} document as a single string. If included files are to be inserted into the document, they will be inserted into the \code{texcode} string.
+
+\item[\code{texlength}:] Counts the total length (in characters) of \LaTeX{} code. Initialised with the length of the \LaTeX{} document. If included documents are inserted, their length is added to \code{texlength}.
+
+\item[\code{line}:] Initialised to an empty string. During parsing, segment by segment (one paragraph at a time) is moved from \code{texcode} to \code{line}. Tokens are then read and subsequently removed from \code{line}.
+
+\item[\code{next}:] Initialised to \code{undef}, this stored the next token to be processed. Upon tokenization, the token is identified and removed from the start of \code{line} and moved to \code{next}.
+
+\item[\code{type}:] Initialised to \code{undef}, this contains the token type (\code{\$TOKEN_\wild}) of the \code{next} token.
+
+\item[\code{style}:] Initialised to \code{undef}, this is used to set the style with which the \code{next} token should be presented in the verbose output.
+
+\item[\code{printstate}:] Initialised to \code{undef}, this is used output the active parsing state for use with verbose output (if \code{\$showstates} is set).
+
+\item[\code{eof}:] Initialised to 0, this is set to 1 once the end of the document is reached.
+
+\item[\code{countsum}:] The contains the main \Obj{count} object.
+
+\item[\code{subcount}:] This contains the present subcount which is also a \Obj{count} object. These subcount are used to count e.g. section and chapters of the document.
+
+\item[\code{errorcount}:] Initialised to 0, used to count the number of errors reported during the parsing.
+
+\item[\code{errorbuffer}:] Undefined at initiation, indicating that errors should be printed instantly rather than stored for later printing. Can be defined as an array which is then used to store error messages so they can be printed later. 
+
+\item[\code{warnings}:] Hash used to store warnings.
+
+\end{description}
+
+When the \Obj{TeXcode} object is initialised, the \LaTeX{} document is placed as a single big string in \code{texcode}. During parsing, \code{next_token} is called on to return the next token, which in turn it delegates to \code{_get_next_token}. Instead of operating on the whole document, which was done in older version of \TeXcount{} and was quite slow on large document, \code{more_texcode} is called on to move segments (i.e. paragraphs) of \LaTeX{} code from \code{texcode} to \code{line}, and then it grabs one token at a time from \code{line}. This is when \code{next} and \code{type} are set.
+
+When the tokens are interpreted and counted, \code{inc_count} is called which increments the appopriate counter in \code{subcount}. If a new subcount is initiated, a call to \code{next_subcount} adds \code{subcount} to \code{sumcount}, including appending the \code{subcount} object to the list of subcounts stored with \code{sumcount}, and then replaces \code{subcount} with a new \Obj{count} object. 
+
+
+\subsection{The \Obj{count} object}
+
+The \Obj{count} object is used to store the word and text element counters. It is created by \code{new_count}. The values it contains are:
+
+\begin{description}
+
+\item[\code{title}:] A string set upon creating to contain a descriptive title of the count.
+
+\item[\code{counts}:] An array, initialized with 0s, which is used to store the counts. The size of the array is determined by \code{\$SIZE_CNT} and should reflect the number of \code{\$CNT_\wild} indices defined.
+
+\item[\code{subcounts}:] This is an array, initialised to an empty array, used to store the subcounts.
+
+\end{description}
+
+In addition to the default fields, when used as the \code{sumcount} field of a \Obj{TeXcode} object, a few additional fields are added:
+
+\begin{description}
+
+\item[\code{TeXcode}:] This is a reference pointing back to the \Obj{TeXcode} object in which it is contained.
+
+\end{description}
+
+
+
+\section{\LaTeX{} code parsing and interpreting}
+
+The entry point for parsing a \LaTeX{} document is the \code{parse} routine. This simply calls \code{_parse_unit} repeatedly using parsing state \code{\$STATE_TEXT} until the end of the document is reached. Thus, \code{_parse_unit} is the main routine for performing the actual parsing.
+
+The \code{_parse_unit} routine is called with a \Obj{TeXcode} object, a parsing state, and optionally an unit-ending token as arguments. It then calls \code{next_token} on the \Obj{TeXcode} object until the unit-ending token is reached: if the file ends before this is found, an error is reported. If no unit-ending token is provided, only one unit will be parsed. If the unit-ending token is set to \code{\$_PARAM_}, indicating that the unit to be parsed is a macro parameter, the \code{\$simple_token} flag is set and passed to \code{next_token} to avoid combining letters into words, and only one token is parsed before returning.
+
+For each token, depending on the token, token type, and active parsing state, \code{_parse_unit} decides how the token should be interpreted. In some cases, the interpretation is done within \code{_parse_unit}, but in many cases the interpretation is delegated to subroutines like \code{_parse_macro}, \code{_parse_math}, etc. If new groups (\code{\{\ldots\}} or \code{\bs{begin}\ldots\bs{end}}) are encountered, this causes \code{_parse_unit} to be cause recursively with an unit-ending token passed to \code{_parse_unit} to identify the group end.
+  
+Note that by default, even blocks that are to be ignored are parsed and required balanced units. Different exclude states exist to deal with cases in which the unit should not be completely parsed.
+
+Upon interpreting the parsed tokens, \code{_parse_unit} or the subroutines to which it delegates the interpretation control the counter incrementation as well as how the tokens are presented in the verbose output. The counter incrementation is done through calls to \code{inc_count} passing as arguments the \Obj{TeXcode} object, the appropriate count reference (\code{\$CNT_\wild}), and optionally a number if the counter should be increased by a number different from 1. Specifying how the token should be presented in the verbose output is done by deciding on the style, usually set using \code{set_style}: the styles are represented by strings that give the style name, which are the same as used as keys in \code{\%STYLE} and as styles in the HTML output.
+
+If a style for presenting a token is selected which is not in the \code{\%STYLE} hash, the token is not printed. Thus, the \code{\%STYLE} hash also filters which tokens are printed to the verbose output.
+
+
+\subsection{Tokenization and token handling}
+
+The routine for retrieving the next token is \code{next_token}. This first makes sure that the previous token gets printed to the verbose output with the style specified by \code{set_style}. It then calls \code{_get_next_token} to retrive the next token: this will process comments and line breaks itself until a token is retrieved that it returns.
+
+The \code{_get_next_token} routine checks the \code{line} field of the \Obj{TeXcode} object to determine which is the next token in \code{line}. If the \code{line} field is empty, it calls \code{more_texcode} to move the next segment of \LaTeX{} code from the \code{texcode} field of the \Obj{TeXcode} object to \code{line}. When it has decided on the approriate kind of token, removing it from the start of the \code{line} field in the process, it sets the \code{next} and \code{type} fields of the \Obj{TeXcode} object through calls to \code{__set_token} or \code{__get_token} (for single character tokens).
+
+If the optional \code{\$simple_token} flag is set, only simple tokens will be returned: i.e. letters will not be combined into words. This is used for parsing macro parameters.
+
+
+\subsection{Processing parameters and options}
+
+In \code{_parse_unit}, based on the parsing state and parsed token, it is decided how to interpret and process the token. In some cases, this processing is restricted to the parsed token itself: counting or ignoring it as well as deciding on the style with which it should be presented in the verbose output.
+
+In some cases, the token influences the parsing of subsequent text: e.g. macros can take parameters and options. Special subroutines exist to handle parsing of macro parameters, gobble up spaces or macro parameters, or handle ignored regions.
+
+
+\subsection{Verbose output}
+
+By default, all parsed code is processed for printing to the verbose output. If it actually gets printed or not depends on whether the set style is included in the \code{\%STYLE} hash or not. 
+
+Upon parsing a token, it is stored in the \code{next} field of the \Obj{TeXcode} object. If \code{set_style} is called during processing, this will set the \code{style} field of the \Obj{TeXcode} object, but will not itself print the token. The \code{flush_next} routine is used to print the \code{next} token using the style set in the \code{style} field, or provided in the call; this in turn calls \code{print_style} which is responsible for the printing. There is an automatic call to \code{flush_next} when the next token is retrieved, ensuring that all tokens are sent off for printing. When \code{flush_next} is called, the \code{style} field is set to \code{\$STYLE_BLOCK='-'} which blocks further printing (or change in style) of the token; the \code{style} field is then set to \code{undef} by \code{next_token} upon reading the next token.
+
+The tokens are passed to \code{print_style}, either directly from the parsing or via \code{next_token}, which looks up the style in the \code{\%STYLE} hash. Only tokens whose style is defined in the \code{\%STYLE} hash get printed. If colour coded output to text is set, the values \code{\%STYLE} are used with the \code{ansiprint} function to print the token using ANSI colour codes. If output to HTML is chosen, the token will be printed enclosed in a \code{<span>} tag using the style as class; the HTML style definitions are then used to determine how these elements will be displayed.
+
+Special style values are \code{\$STYLE_EMPTY=' '} which is used for spaces and must be defined in the \code{\%STYLE} for spaces to be printed, and the \code{\$STYLE_BLOCK='-'} style value which is not actually a style but a value used to mark that the token has already been printed and block further printing of it.
+
+In addition to the \code{\%STYLE} hash which specifies which tokens get printed, there is a global variable \code{\$printlevel} the value of which is taken from the \code{\%STYLE} which is used to control if verbose output is on ($1$ or $2$) or off ($0$ or $-1$). The $-1$ values indicates the quiet mode in which errors should not be printed; the value $1$, as opposed to $2$, indicates that multiple ignored lines should be collapsed to make the verbose output more compact, although this is only partially done.
+
+The routines for handling tokens, styles and verbose printing remain from the earliest version of \TeXcount{} and has not undergone much improvements or cleaning up and remains somewhat unstructured. Hence, there may be stray calls to e.g. \code{set_style} that no longer have any effect.
+
+
+
+\section{Regex patters: letters, words, macro options}
+
+One of the most important regex definitions in \TeXcount{} is that used to recognize words. This is done in two steps: first a regex for letters is produced, and then this is combined with patterns for words to generate one big pattern.
+
+Another regex defined is the one used to recognize macro options, i.e. \code{[\ldots]}, that appear together with macros and which should be ignored.
+
+One reason behind the desire to generate one big pattern rather than loop through alternative patterns is to enable Perl to compile each pattern just once. The pattern compilation typically takes longer than the pattern matching, so this can make a big difference.
+
+
+\subsection{The word regex}
+
+First note that \TeXcount{} distinguishes between alphabetic words, i.e. words composed of letters, and logograms (e.g. Chinese characters) which are counted per character. When words (or letters) are counted, these are made from characters defined as alphabetic; characters defined as logographic are counted separately character by character.
+
+The regex pattern recognizing a letter is placed in \code{\$LetterPattern}. This is usually taken from one of the optional patterns in \code{\%NamedLetterPattern}, but can be modified elsewhere or replaced by \code{undef} to signify that no words or letters should be counted.
+
+A number of regex patterns which should be recognized as words are place in the array \code{\@WordPatterns}. This is usually set by using one of the named lists of word patterns defined in \code{\%NamedWordPattern}, but can be redefine or modified by options. In the word patterns, the character \code{\@} is used to represent a letter, and this is later replaced by \code{\$LetterPattern} when the options are applied.
+
+After parsing the command line arguments, the options and settings are applied. At this point, through \code{apply_language_options}, \code{\$LetterPattern} is applied to \code{\@WordPatterns}, which are then combined into a single regex: \code{\$WordPattern}. At this point, patterns for recognizing logograms are also added.
+
+
+\subsection{The macro option regex}
+
+After macros and macro parameters, macro options on the form \code{[\ldots]} will be ignored. There is a single regex used to recognize and remove these macro options.
+
+For most uses, macro options tend to be short codes which are easily recognized. However, there are also cases where the macro options can be more complex. On the other hand, there are also cases where brackets are used without being macro options, and it is vital that these cases should not be mistaken for macro options: in particular if they contain text that should be counted.
+
+In order to capture most macro options as options without running a risk of ignoring actual text enclosed in brackets, restrictions are placed on what can go inside macro options. The default rule is moderately strict, but can be relaxed to allow more extensive and general macro options.
+
+The different macro option regex patterns are named in \code{\%NamedMacroOptionPattern} and copied to \code{\$MacroOptionPattern} when initialised or changed by options.
+
+
+\subsection{Unicode character classes}
+
+The user can specify which character classes should be considered alphabetic (i.e. letters) and which should be considered logographic (i.e. counted as indicidual characters). Typical alphabetic characters are the Latin letters. Typical logograms are the Chinese characters. If any of the language options are used, these character classes will automatically be set.
+
+Specifications of alphabets and logograms are done by options \code{-alpha=} and \code{-logo=} using Unicode character classes. Unicode classes include Latin, Digit, Ideographic, Han, etc. Note that all Unicode character classes start with capital letters. 
+
+
+\subsection{Custom made character classes}
+
+Some of the Unicode character classes are not defined quite as desired by \TeXcount{}. In particular, the \code{Alphabetic} character class includes \code{Ideographic}, which would cause e.g. Chinese characters to be allowed as parts of words together with Latin characters rather than force them to be counted as individual characters. To resolve this problem, new character classes are defined in \TeXcount{} that fit our need.
+
+New character classes can be defined within \TeXcount{} through subroutines named \code{Is_\textit{name}}. Most notable is the \code{Is_alphabetic} character class from which the logographic characters have been excluded. This is now used as the default alphabetic character class.
+
+Presently defined characters classe are named \code{digit}, \code{alphabetic}, \code{alphanumeric}, \code{punctuation}, \code{cjk}, \code{cjkpunctuation}. Note that these are all lower case, and have the prefix \code{Is_} added when referred to in the code.
+
+When adding character classes to the set of alphabetic or logographic characters using \code{-alpha=} or \code{-logo=}, the names without the prefix \code{Is_} may be used: for character classes starting with a lower case letter, the prefix is added automatically.
+
+Note that the subroutines specifying the character classes must be defined prior in the code to any use: this is unlike other subroutines which may be defined anywhere in the code. Also, to be permitted as character classes by Perl, the subroutines must start with \code{Is_} (or \code{In_} although that is not used by \TeXcount{}), although different versions of Perl need not enforce this.
+
+
+
+\section{Macro handling rules}
+
+While some rules for handling macros are hard-coded into \TeXcount{}, most of the rules are stored in a number of hashes which \TeXcount{} look up whenever a macro is encountered. The general rule is that the keys are either macros (e.g. \code{'\bs{section}'}) or environment names (e.g. \code{'quote'}).
+
+\begin{description}
+
+\item[\code{\%TeXmacro}:] The keys are macros, or \code{'begin\textit{name}'} where name is an environment name, and the values specify how many parameters the macro (or environmemt) takes and how these should be processed. See the section on parameter handling rules further down.
+
+\item[\code{\%TeXenvir}:] The keys are environment names, and values are the parsing state with which the contents of the environment should be parsed.
+
+\item[\code{\%TeXpreamble}:] These are macro handling rules to be applied in the preamble, i.e. after \code{\bs{documentclass}} but before \code{\bs{begin}\{document\}}. The rules are specified as for \code{\%TeXmacro}.
+
+\item[\code{\%TeXfloatinc}:] These are macro handling rules to be applied within floating bodies, i.e. tables and figures.
+
+\item[\code{\%TeXmacroword}:] The keys are macros, and the values are numbers representing how many words the macro generates. This is used for macros like \code{\%LaTeX} which generates text.
+
+\item[\code{\%TeXpackageinc}:] The keys are macros used to include packages. Although included in \code{\%TeXmacro}, the processing of package inclusion is actually performed by \code{_parse_include_package} independent of the hash value. The value should therefore be \code{1} or \code{[\$STATE_IGNORE]} since this is how it will be processed by \code{_parse_include_package}.
+
+\item[\code{\%TeXfileinclude}:] The keys are macros used to include \LaTeX{} files into the document, the value a keyword or list of keywords telling how file names and paths should be interpreted. Processing of these macros is done by \code{_parse_include_file}.
+
+\end{description}
+
+Note that the definition of \code{\%TeXmacro} starts by including \code{\%TeXpreamble}, \code{\%TeXfloatinc} and \code{\%TeXpackageinc}. After that, the values of \code{\%TeXpackageinc} are never used. For \code{\%TeXpreamble} and \code{\%TeXfloatinc}, however, it is in principle possible to rules within the preamble and floats, respectively, that are different from those defined in \code{\%TeXmacro} and applied elsewhere in the document.
+
+
+\subsection{Parameter handling rules}
+
+A macro can be specified to take a given number of parameters: this will typically be \code{\{\ldots\}} blocks following the macro. For each of these parameters, a separate parsing state can be specified. This is represented by an array with one element for each parameter, the elements being the parsing state (\code{\$STATE_\wild}) with which that parameter should be parsed.
+
+In addition to the \code{\$STATE_\wild} rules are some modifier/option states, \code{\$_STATE_\wild}. The \code{\$STATE_OPTION} states indicates that the next rule in the list is an optional parameter enclosed in \code{[]}. By default \code{[]} options are ignored, which can be swithed off by \code{\$STATE_NOOPTION} or on by \code{\$STATE_AUTOOPTION}.
+
+An alternative specification of a parameter handling rule is to give the number of parameters to ignore. \TeXcount{} will check if the specified rule is an array (as described above) or a number and interpret the rule accordingly.
+
+The hashes \code{\%TeXmacro}, \code{\%TeXpreamble} and \code{\%TeXfloatinc} all take values that are this kind of parameter handling rules, as are q\code{\%TeXpackageinc} since they are included in \code{\%TeXmacro}.
+
+Throughout the script, parsing states are referred to using the \code{\$STATE_\wild} constants. In previous versions, however, these codes were hard-coded into the script and used both to set up the hashes and to specify new rules through \%TC instructions. For backward compatibility, the old numerical state codes remain in the conversions from keywords to \code{\$STATE_\wild} constants as stored in \code{\%key2state} and applied through calls to \code{convert_hash} accompanied by \code{keyarray_to_state} or \code{key_to_state}.
+
+
+\subsection{File inclusion and the \code{\%TeXfileinclude} hash}
+
+The main \LaTeX{} commands for file inclusion are \code{\bs{input}} and \code{\bs{include}}, while \code{\bs{bibliography}} includes the \code{.bbl} bibliography file. However, additional packages exist that can also modify the file search path, of which \TeXcount{} has support for the \code{import} package.
+
+File inclusion macro rule are stored in the \code{\%TeXfileinclude} hash. The values are strings which contain one or more keywords (separated by space or comma):
+%
+\begin{description}
+
+\item[\code{input}:] This is a special keyword to use with \code{\bs{input}}. The handling of the parameter values is as \code{file}, but the parameter itself is not required to be enclosed in \code{\{\}}.
+
+\item[\code{file}:] This parameter simply gives the name of or path to a file. If the file is not found, \TeXcount{} will append \code{.tex} and try again.
+
+\item[\code{texfile}:] This parameter gives the name of or path to a file, but \code{.tex} will be appended, and is the rule used by \code{\bs{include}}.
+
+\item[\code{dir}:] This parameter provides the path of a directory relative to the \code{\$workdir}, and adds this to the search path before including any files. This is used with the \code{\bs{import}} macro of the \code{import} package.
+
+\item[\code{subdir}:] This parameter provides the path of a directory relative to the current directory, and adds this to the search path before including any files. This is used with the \code{\bs{subimport}} macro of the \code{import} package.
+
+\item[\code{<bbl>}:] This is a special keyword to use with \code{\bs{bibliography}} to specify inclusion of the bibliography file.
+
+\end{description}
+
+The parsing of the macros and parameters is done by \code{_parse_include_file}. For each keyword it parses a parameter, unless the parameter is on the form \code{<\textit{keyword}>}. The parsing of the \code{input} parameter is handled differently from the rest since it need not be enclosed by \code{\{\}}. It then delegates the processing of macro inclusion rules to \code{include_file}.
+
+In \code{include_file}, the file is located (based on search path) and either appends the file to the \code{@filelist} array of files to be include, or merged immediately into the document by a calls to \code{read_binary} and \code{prepend_code}.
+
+The \code{@filelist} array contains elements which are themselves arrays on the form \code{[file,path,\ldots]} where the first element is the path to the file to be included, and the remaining elements are the search paths used to set the \code{PATH} values of the \Obj{TeXcode} object. For the top level files, i.e. the ones specified on the command line, the search path will contain only \code{\$workdir}: the directory from which \TeXcount{} is executed unless \code{-dir} is used to specify otherwise. If more directories are added to the path, \code{\$workdir} will remain the last directory of the search path, while the first directory of the search path will be considered the current directory.
+
+File inclusion macros can also take parameters that should be parsed using regular macro parsing rules. The \code{TeXmacro} hash will be checked for \code{@pre\bs{macroname}} and \code{@post\bs{macroname}} entries which will be applied before and after the file handling rules.
+
+
+\subsection{Package and document class specific rules}
+
+Whenever \TeXcount{} encounters a package inclusion, it will check for package specific rules. These are defined in hashes names \code{\%PackageTeXmacro} etc. which maps the package name to the hash map of rules to be added to \code{\%TeXmacro} etc. There is an additional \code{\%PackageSubpackage} which for each package name in the set of keys maps to a list of packages whose rules should automatically be included.
+
+Similarly, rules specific to particular document classes may be implemented by using the key \code{class\%\parm{name}} instead of the package name, and these will then be added to the set of parsing rules if \code{\bs{documentclass}\{\parm{name}\}} is encountered.
+
+Note that rules for including the bibliography is also stored in these hashes under the key \code{\%incbib}.
+
+
+
+\section{Presentation of summary statistics}
+
+The counts (words, headers, etc.) from a \LaTeX{} document are stored as a \Obj{count} objects. The main routine for printing the summary statistics from a \Obj{count} object is \code{print_count}: the routine \code{conditional_print_total} which is called from \code{MAIN} delegates printing to \code{print_count} except if the brief output format is selected. The \code{print_count} routine then delegates the printing to one of a number of subroutines depending on the settings.
+
+Word frequencies are store globally in \code{\%WordFreq}. This gets incremented each time \code{_process_word} is called. Summary of word frequencies are produced and printed by \code{print_word_freq} which tries to combine words that differ only by capitalization, and also produces subcounts per character class.
+
+A global count of the number of errors reported is stored in \code{\$errorcount}, while warnings are stored globally in the \code{\%warnings} hash mapping when added through the \code{warning} routine with the warning as key and the number of occurrences as value to ensure each warning is only listed once no matter how many times it is reported. Both warnings and errors are also stored in their respective \Obj{Main} or \Obj{TeXcode} objects when reported through calls to \code{error} or \code{warning}.
+
+In \code{MAIN}, after processing of the \LaTeX{} documents, \code{Report_Errors} is called to give a total report on errors and warnings. The exact output depends on the settings. 
+
+NB: Processing of errors and warnings requires some improvement. Now, parts of the code handle errors per file, others do so globally.
+
+
+
+\section{Encodings}
+
+The preferred encoding is Unicode UTF-8. From version 2.3 of \TeXcount{}, this is used internally to represent the \LaTeX{} code, and Unicode is relied upon to handle different character sets and classes.
+
+When files are read into \TeXcount{}, they may have to be decoded from whatever encoding they are saved in into UTF-8. The file encoding may be specified explicitly using the \code{-enc=} option, otherwise \TeXcount{} will try to guess the appropriate encoding.
+
+The output from \TeXcount{} is be default UTF-8. However, if a file encoding is specified using \code{-enc=} and output is text, not HTML, this encoding will also be applied to the output. This may be useful when using \TeXcount{} in a pipe, otherwise the documents will be converted to UTF-8.
+
+
+
+\section{Help routines and text data}
+
+A hash, \code{\%GLOBALDATA}, and hash reference \code{\$STRINGDATA} are is defined for storing strings used for various outputs. The \code{\%GLOBALDATA} is set up containing string constants for version number, maintainer name, etc., while \code{\$STRINGDATA} is initially undefined.
+
+The \code{\$STRINGDATA} hash is accessed through calls to \code{StringData} which initialises the hash if undefined. Initialisation, which is done by \code{STRINGDATA}, reads through the \code{__DATA__} section at the end of the script, identifies headers which are used as keys in the hash which maps to and array containing the subsequent text lines. References in the read text on the form \code{\$\{keyword\}} are replaced by the corresponding string in \code{\%GLOBALDATA}: this allows e.g. version information to be inserted into the text.
+
+Headers in the text data consists of three or more colons followed by space(s) and a keyword. Lines containing three or more colons but no keyword have no effect.
+
+Lines starting with \code{\@} are used to format output printed by \code{wprintlines}. The two characters \code{'-'} and \code{':'} can then be used to indicate indentation tabulators, and subsequent lines will be indented and wrapped: this is used for printing help on command line options. The \code{wprintlines} also wraps text: the page with is set by \code{\$Text::Wrap::columns}.
+
+
+\end{document}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/TechDoc.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/macros.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/macros.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/macros.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,90 @@
+%% LaTeX macros
+\usepackage{times}
+%\usepackage{garamond}
+%\usepackage{fourier} % math & rm
+\usepackage[scaled=1.05]{helvet} % ss
+\usepackage{relsize}
+\usepackage[dvipsnames]{xcolor}
+
+% Define underscore to be a regular character in text mode
+\begingroup
+  \catcode`\_=\active
+  \gdef_#1{\ensuremath{\sb{#1}}}
+\endgroup
+\mathcode`\_=\string"8000
+\catcode`\_=12
+
+% Set version number
+\newcommand\version{$3.1$}
+
+\newcommand\copyrightfootnote{
+\footnotetext{Copyright (2008-2017) of Einar Andreas R{\o}dland, distributed
+under the \LaTeX{} Project Public License (LPPL).}
+}
+
+%TC:macroword \TeXcount 1
+\newcommand\TeXcount{{\TeX}count}
+
+% Text formats
+\newcommand\codestyle[1]{\textsf{\color{Blue}#1}}
+\newcommand\code[1]{{\smaller\codestyle{#1}}}
+\newcommand\bigcode[1]{\codestyle{#1}}
+\newcommand\codeline[1]{\begin{quote}\code{#1}\end{quote}}
+\newcommand\bs[1]{\textbackslash#1}
+\newcommand\URL[1]{\textsf{\small #1}}
+
+% Description items: options, parameters, optional parameters
+% These are also interpreted by dos2html.pl
+\def\option[#1]{\item[\code{#1}]\hskip 0pt plus 10pt}
+\def\parm#1{\textit{\color{OliveGreen}#1}}
+\def\opt#1{\parm{[#1]}}
+\def\alt#1{[#1]}
+\def\optiontext#1{\textrm{\bfseries\color{black}#1}}
+
+% Mark off notification in contents for good visibility
+\newcommand\ContentsNote[1]{\addtocontents{toc}{\string\marginpar{\textsf{\color{red}\tiny #1}}}}
+
+% Notabene: margin note
+%TC:macro NB 1
+\newcommand\NB[1]{\ContentsNote{NB}\marginpar{\textsf{\tiny#1}}}
+
+% Mark text as a notification
+%TC:macro NOTE [text]
+\newcommand\NOTE[1]{\textit{\color{red}#1}}
+
+% Mark text for update
+%TC:macro UPDATE [text]
+\newcommand\UPDATE[1]{\ContentsNote{UPDATE}\textit{\textbf{\color{red}This needs to be updated:} {\color{blue}#1}}}
+
+% Mark text as a notification
+%TC:macro TODO [text]
+\newcommand\TODO[1]{\ContentsNote{TODO}\textit{\textbf{\color{red}#1}}}
+
+% BUG
+%TC:macro BUG [text]
+\newcommand\BUG[1]{{\color{red}#1}}
+
+\makeatletter
+
+\renewcommand\@maketitle{%
+\newpage\null\vskip 2em%
+\begin{center}%
+\let\footnote\thanks
+{\LARGE \@title \par}%
+\end{center}%
+\par
+\vskip 1.5em
+}
+
+\renewcommand\abstractname{Abstract}
+\renewenvironment{abstract}{%
+  \begin{center}%
+    {\slshape\bfseries\large\abstractname\vspace{-.5em}\vspace{\z@}}%
+  \end{center}%
+  \vskip 4pt
+  \slshape
+}{
+\vskip 0.5em
+}
+
+\makeatother


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/macros.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_addrules.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/sub_addrules.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/sub_addrules.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,33 @@
+% Subsection: TC-commands for adding macro rules
+
+\begin{description}
+%PATTERN: \option[]
+%OUTPUT:  <dt>#1<dt>
+%OUTPUT:  <dd>#x<dd>
+%OUTPUT:
+\sloppy
+
+\option[macro \parm{macroname} \parm{parameter-rules}]
+Defines macro handling rule for the specified macro. The parameter is on the form \code{[\textit{rule},\ldots]} where each rule is either a keyword indicating the parsing rule for a macro parameter or \code{option:\textit{rule}} for an optional \code{[]}-enclosed parameter. Alternatively, an integer value $n$ indicates that the $n$ first parameters to the macro should be ignored, equivalent to giving a list of $n$ \code{ignore} rules.
+
+\option[envir \parm{envirname} \parm{parameter-rules} \parm{content-rule}]
+(The previously used command, \code{group}, remains an alias for \code{envir}, but the name \code{envir} is more appropriate and therefore recommended.)
+This specifies the handling of environments with the given name. The parameter handling rule, applied to parameters following \code{\bs{begin}\{\textit{name}\}}, are specified as in the \code{macro} instruction. The second parameter specifies the rule, i.e. parser state, with which the contents should be parsed.
+
+\option[macrocount \parm{macroname} \opt{count-spec.}]
+(An alias for \code{macrocount} is \code{macroword}; the preferred name was changed to reflect that this can count any element, not just words.)
+If a number is provided as the count parameter, this defines the given macro to be counted as the specified number of words; if no count is specified, it is assumed to be 1. Alternatively, a \code{[]}-enclosed list of counters can be specified (using the counter keywords), causing each of them to be incremented: counter are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath} plus a number of aliases.
+
+\option[breakmacro \parm{macroname}]
+Specify that the given macro should cause a break point.
+
+\option[floatinclude \parm{macroname} \parm{parameter-rules}]
+Specify macro handling rules used within float groups. The handling rules are specified as for \code{macro}. Most commonly, the parameter rule will be the \code{otherword}/\code{oword} to specify that words should be counted as \textit{other words}.
+
+\option[preambleinclude \parm{macroname} \parm{parameter-rules}]
+Specifies macro handling rules to be used in the preamble: the text between \code{\bs{documentclass}} and \code{\bs{begin}\{document\}}. The rule is specified like the \code{macro} rules.
+
+\option[fileinclude \parm{macroname} \parm{file-path-spec.}]
+Specifies macros that cause files to be included when \TeXcount{} is run with the \code{-inc} option. The parameters specify the format on which the file path is specified, and can also be used to modify the search path used within the included document.
+
+\end{description}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_addrules.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_options.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/sub_options.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/sub_options.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,155 @@
+% Subsection: command line options
+
+\begin{description}
+%PATTERN: \option[]
+%OUTPUT:  <dt>#1<dt>
+%OUTPUT:  <dd>#x<dd>
+%OUTPUT:
+\def\option[#1]{\item[\quad\code{#1}]\hskip 0pt plus 10pt}
+\def\alt#1{[#1]}
+\def\alts#1{[[#1]]}
+\sloppy
+
+\option[-v]Verbose (same as \code{-v3}).
+
+\option[-v0]No details (default).
+
+\option[-v1]Prints counted text, marks formulae.
+
+\option[-v2]Also prints ignored text.
+
+\option[-v3]Also includes comments and options.
+
+\option[-v4]Same as \code{-v3 -showstate}.
+
+\option[-v=\ldots, -v\alts{0-4}\ldots]Allows detailed control of which elements are included in the verbose output. The provided values is a list of styles or style categories separated by \code{+} or \code{-} to indicate if they should be added or removed from the list of included styles. Style categories start with capital letter and include \code{Words}, \code{Macros}, \code{Options}; the individual styles are in all lower case and include \code{word}, \code{hword}, \code{option}, \code{ignore}.
+
+\option[-showstate]Show internal states (with verbose).
+
+\option[-brief]Only prints a one line summary of the counts for each file.
+
+\option[-q, -quiet]Quiet mode, does not print error messages. Use is discouraged, but it may be useful when piping the output into another application.
+
+\option[-strict]Prints a warning of begin-end groups for which no specific rule is defined.
+
+\option[-total]Only give total sum, no per file sums.
+
+\option[-1]Same as specifying \code{-brief} and \code{-total}, and ensures there will only be one line of output. If used with \code{-sum}, the output will only be the total number.
+
+\option[-0]Same as \code{-1}, i.e. \code{-brief} and \code{-total}, but does not put a line shift at the end. This may be useful when the one line output is to be used by another application, e.g. Emacs, for which the line shift would otherwise need to be stripped away.
+
+\option[-template="\ldots"]Specify an output template which is used to generate the summary output for each file and for the total count. Codes \code{\{\textit{label}\}} is used to include values, where \code{\textit{label}} is one of \code{0} to \code{7} (for the counts), \code{SUM}, \code{ERROR} or \code{TITLE} (first character of label is sufficient). Conditional inclusion is done using \code{\{\textit{label}?\textit{text}?\textit{label}\}} or \code{\{\textit{label}?\textit{if non-zero}|\textit{if zero}?\textit{label}\}}. If the count contains at least two subcounts, use \code{\{SUB?\textit{template}?SUB\}} with a separate template for the subcounts, or \code{\{SUB?\textit{prefix}|\textit{template}|\textit{suffix}?SUB\}}.
+
+\option[-sub\alt{=\ldots}, -subcount\alt{=\ldots}]Generate subcounts. Valid option values are \code{none}, \code{part}, \code{chapter}, \code{section} and \code{subsection} (default), indicating at which level subcounts are generated. (On by default.)
+
+\option[-nosub]Do not generate subcounts.
+
+\option[-sum\alt{=n,n,\ldots}]Produces total sum, default being all words and formulae, but customisable to any weighted sum of the seven counts (list of weights for text words, header words, caption words, headers, floats, inlined formulae, displayed formulae).
+
+\option[-nosum]Do not generate total sum. (Default choice.)
+
+\option[-col]Use ANSI colour codes in verbose output. This requires ANSI colours which is used on Linux, but may not be available under Windows. On by default on non-Windows systems. 
+
+\option[-nc, -nocol]No colours (colours require ANSI). Default under Windows.
+
+\option[-nosep, -noseparator]No separating character/string added after each word in the verbose output (default).
+
+\option[-sep=, -separator=]Separating character or string to be added after each word in the verbose output.
+
+\option[-relaxed]Relaxes the rules for matching words and macro options.
+
+\option[-restricted]Restricts the rules for matching words and macro options.
+
+\option[-]Read \LaTeX{} code from STDIN.
+
+\option[-inc]Parse included files (as separate files).
+
+\option[-merge]Merge included files into document (in place).
+
+\option[-noinc]Do not parse or merge in included files (default).
+
+\option[-incbib]Include bibliography in count, include bbl file if needed.
+
+\option[-nobib]Do not include bibliography in count (default).
+
+\option[-incpackage=]Include rules for a given package.
+
+\option[-dir\alt{=\ldots}]Specify working directory which will serve as root for all include files. The default (\code{-dir=.}) is to use the current directory, i.e. from which \TeXcount{} is executed: the path can be absolute or relative to the current directory. Use \code{-dir} to use the path of the main \LaTeX{} document as working directory.
+
+\option[-auxdir\alt{=\ldots}]Specify the directory of the auxiliary files, e.g. the bibliography (\code{.bbl}) file. The default setting (\code{-auxdir} only) indicates that auxiliary files are in the working directory (from the \code{-dir} or \code{-dir=} option). If \code{-auxdir=} is used to provide a path and \code{-dir=} is used to specify the working directory, the path to the auxiliary directory is take to be relative to the current folder (from which \TeXcount{} is executed); if used with \code{-dir}, the working directory is taken from the path of the parsed file, and the auxiliary directory is taken to be relative to this (unless an absolute path is provided).
+
+\option[-enc=, -encoding=]Specify encoding to use in input (and text output).
+
+\option[-utf8, -unicode]Use UTF-8 (Unicode) encoding. Same as \code{-encoding=utf8}.
+
+\option[-alpha=, -alphabets=]List of Unicode character groups (or digit, alphabetic) permitted as letters. Names are separated by \code{,} or \code{+}. If list starts with \code{+}, the alphabets will be added to those already included. The default is Digit+alphabetic.
+
+\option[-logo=, -logograms=]List of Unicode character groups interpreted as whole word characters, e.g. Han for Chinese characters. Names are separated by \code{,} or \code{+}. If list starts with \code{+}, the alphabets will be added to those already included. By default, this is set to include Ideographic, Katakana, Hiragana, Thai and Lao.
+
+\option[-ch, -chinese, -zhongwen]Turn on Chinese mode in which Chinese characters are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (GB2312, Big5, Hz) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
+
+\option[-jp, -japanese]Turn on Japanese mode in which Japanese characters (Kanji and Kana) are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (e.g. EUC-JP) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
+
+\option[-kr, -korean]Turn on Korean mode in which Korean characters (Hangul and Han) are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (e.g. EUC-KR) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
+
+\option[-kr-words, -korean-words]Korean mode in which Hangul words are counted (i.e. as words separated by spaces) rather than characters. Han characters are still counted as characters. See also \code{-korean}.
+
+\option[-chinese-only, ..., -korean-words-only]As options \code{-chinese}, ..., \code{-korean-words}, but also excludes other alphabets (e.g. letter-based words) and logographic characters.
+
+\option[-count-all, -all]Allow all alphabets, digits, and logograms as letters, whether counting words or characters. This is the default setting.
+
+\option[-char, -letter]Count letters instead of words. This count does not include spaces.
+
+\option[-char-only, -letter-only]Count letters instead of words, but excludes logograms (like chinese characters). This count does not include spaces.
+
+\option[-all-nonspace-char, -all-nonspace-characters]Counts characters instead of words, including letters, logograms, and punctuation, but not including spaces.
+
+\option[-out=]Send output to file. Takes file name as value.
+
+\option[-out-stderr]Send output to STDERR instead of STDOUT.
+
+\option[-html]Output in HTML format.
+
+\option[-htmlcore]Only HTML body contents.
+
+\option[-htmlfile=]File containing a template HTML document with \code{<!-- TeXcount -->} included somewhere to indicate the location where the TeXcount output from the parsing should be inserted.
+
+\option[-tex]Encode \TeX{} special characters for output into \TeX{} code.
+
+\option[-css=]Reference to CSS to be included in the HTML output instead of including the style definition directly in the output.
+
+\option[-cssfile=, -css=file:]File containing style definitions to be included into the HTML output instead of the default styles.
+
+\option[-freq\alt{=\#}]Count individual word frequencies. Optionally, give minimal frequency required to be included in output.
+
+\option[-stat]Produce statistics on language usage, i.e. based on the alphabets and logograms included.
+
+\option[-macrostat, -macrofreq]Produce statistics on package, environment and macro usage. 
+
+\option[-codes]Display an overview of the colour codes. Can be used as a separate option to only display the colour codes, or together with files to parse.
+
+\option[-nocodes]Do not display overview of colour codes.
+
+\option[-opt=, -optionfile=]Reads options (command line parameters) from a specified text file. Should use one option per line. May also include TC options in the same format as specified in \LaTeX{} documents, but prefixed by \code{\%} rather than \code{\%TC:}. Blank lines and lines starting with \code{\#} are ignored; lines starting with \code{\bs{}} are considered to be continuations of the previous line.
+
+\option[-split, -nosplit]The \code{-split} option, which is on by default, speeds up handling of large files by splitting the file into paragraphs. To turn it off, use the \code{-nosplit} option.
+
+\option[-showver, -nover]Include version number in output with \code{-showver}; use \code{-nover} not to show it (default). 
+
+\option[-h, -?, --help, /?]Help.
+
+\option[-h=, -?=, --help=, /?=]Help on particular macro or group name: gives the parsing rule for that macro or group if defined. If the the macro or environment is package specific, use \code{-h=\parm{package}:\parm{name}}; replace \code{\parm{package}} with \code{class\%\parm{name}} if it is specific to a document class.
+
+\option[-help-options, -h-opt]Lists all TeXcount options and help on them.
+
+\option[--help-option=, -h-opt=]Lists all TeXcount options containing the provided string: e.g. \code{-h-opt=inc} lists all options containing \code{inc}, while \code{-h-opt=-v} lists all options starting with \code{v}.
+
+\option[-help-style, -h-style]Lists all styles and style categories, i.e. those permitted used in -v={styles-list}.
+
+\option[-help-style=, -h-style=]Gives description of style or style category. 
+
+\option[-ver, --version]Print version number.
+
+\option[-lic, --license]License information.
+
+\end{description}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_options.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_ruletypes.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/sub_ruletypes.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/sub_ruletypes.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,38 @@
+%%
+%
+\begin{description}
+%PATTERN: \option{}[]{}
+%OUTPUT:  <dt>#1 (keys: #3; formerly code #2)<dt>
+%OUTPUT:  <dd>#x<dd>
+%OUTPUT:
+\def\option#1[#2]#3{\item[#1:] (key: \code{#3} formerly \code{#2})}
+
+\option{Text}[1]{text, word, wd, w}
+ Count as text (i.e. count words).
+\option{Header text}[2]{headertext, headerword, hword, hwd, hw}
+ Count as header text.
+\option{Other text}[3]{otherword, other, oword, owd, ow}
+ Count as float/caption text.
+\option{Displaymath}[7]{displaymath, dsmath, dmath, ds}
+ Count as displayed math formulae.
+\option{Inline math}[6]{inlinemath, inline, imath, eq}
+ Count as inlined math formulae.
+\option{To header}[4]{header, heading, head}
+ Count header, then count text as \code{headertext} (transition state).
+\option{To float}[5]{float, table, figure}
+ Count float, then parse contents as \code{isfloat} (transition state).
+\option{Preamble}[-9]{}
+ Parse as preamble, i.e. ignore text but look for \code{preambleinclude} macros.
+\option{Ignore}[0]{ignore}
+ Ignore text, i.e. do not count, but will still parse the code.
+\option{Float}[-1]{isfloat}
+ Float contents, ignore text but look for \code{floatinclude} macros.
+\option{Strong exclude}[-2]{xx}
+ Strong ignore which ignore environments, e.g. to use in macro definitions where
+ \code{\bs{begin}}--\code{\bs{end}} need not be balanced.
+\option{Stronger exclude}[-3]{xxx}
+ Stronger ignore, handles macros as isolated tokens without handling their parameters,
+ to use with macro definitions like \code{\bs{newcommand}} and \code{\bs{def}}.
+\option{Exclude all}[-4]{xall}
+ Ignore all, including unbalanced braces (e.g. used by \code{\%TC:ignore} and the \code{verbatim} environment). This rule may be used for environment contents, but not for macro or environment parameters or options since the exclusion causes \{ and [ to be ignored.
+\end{description}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_ruletypes.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_tc_other.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/doc/sub_tc_other.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/support/texcount/doc/sub_tc_other.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -0,0 +1,30 @@
+%% Other TC instructions
+
+\begin{description}
+%PATTERN: \option[]
+%OUTPUT:  <dt>#1<dt>
+%OUTPUT:  <dd>#x<dd>
+%OUTPUT:
+
+\option[break \parm{title}]
+Break point which initiates a new subcount. The title is used to identify the following region in the summary output.
+
+\option[incbib \optiontext{or} includebibliography]
+Sets bibliography inclusion, same as running \TeXcount{} with the option \code{-incbib}.
+
+\option[subst \parm{macro} \parm{text}]
+This substitutes a macro with any text. The verbose output will show the substituted text: e.g. \code{\%TC:subst \bs{test} TEST} will cause a following \code{\bs{newcommand}\bs{test}\{TEST\}} to be changed into \code{\bs{newcommand} TEST\{TEST\}}, which \TeXcount{} will interpret differently. Use with care!
+
+\option[ignore]
+Indicates start of a region to be ignored. End region with \code{\%TC:endignore}.
+
+\option[insert \parm{\TeX-code}]
+Insert \TeX{} code for \TeXcount{} to process.
+
+\option[newcounter \parm{name} \opt{description}]
+Define a new counter with the given name and description (optional). A corresponding parsing rule will also be added with the same name.
+
+\option[newtemplate \optiontext{and} template \opt{template-line}]
+Specify a template for the summary output. The first line should just declare a new template using \code{\%TC:newtemplate}, while the subsequent lines use \code{\%TC:template} followed by text specifying the template. The line breaks in the template specification are not of importance: to specify a line break, use \code{\bs{n}}.
+
+\end{description}


Property changes on: trunk/Master/texmf-dist/doc/support/texcount/doc/sub_tc_other.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Deleted: trunk/Master/texmf-dist/doc/support/texcount/macros.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/macros.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/macros.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,86 +0,0 @@
-%% LaTeX macros
-\usepackage{relsize}
-\usepackage[dvipsnames]{xcolor}
-
-% Define underscore to be a regular character in text mode
-\begingroup
-  \catcode`\_=\active
-  \gdef_#1{\ensuremath{\sb{#1}}}
-\endgroup
-\mathcode`\_=\string"8000
-\catcode`\_=12
-
-% Set version number
-\newcommand\version{$3.0$}
-
-\newcommand\copyrightfootnote{
-\footnotetext{Copyright (2008-2013) of Einar Andreas R{\o}dland, distributed
-under the \LaTeX{} Project Public License (LPPL).}
-}
-
-%TC:macroword \TeXcount 1
-\newcommand\TeXcount{{\TeX}count}
-
-% Text formats
-\newcommand\codestyle[1]{\textsf{\color{Blue}#1}}
-\newcommand\code[1]{{\smaller\codestyle{#1}}}
-\newcommand\bigcode[1]{\codestyle{#1}}
-\newcommand\codeline[1]{\begin{quote}\code{#1}\end{quote}}
-\newcommand\bs[1]{\textbackslash#1}
-\newcommand\URL[1]{\textsf{\small #1}}
-
-% Description items: options, parameters, optional parameters
-% These are also interpreted by dos2html.pl
-\def\option[#1]{\item[\bigcode{#1}]\hskip 0pt plus 10pt}
-\def\parm#1{\textit{\color{OliveGreen}#1}}
-\def\opt#1{\parm{[#1]}}
-\def\alt#1{[#1]}
-\def\optiontext#1{\textrm{\bfseries\color{black}#1}}
-
-% Mark off notification in contents for good visibility
-\newcommand\ContentsNote[1]{\addtocontents{toc}{\string\marginpar{\textsf{\color{red}\tiny #1}}}}
-
-% Notabene: margin note
-%TC:macro NB 1
-\newcommand\NB[1]{\ContentsNote{NB}\marginpar{\textsf{\tiny#1}}}
-
-% Mark text as a notification
-%TC:macro NOTE [text]
-\newcommand\NOTE[1]{\textit{\color{red}#1}}
-
-% Mark text for update
-%TC:macro UPDATE [text]
-\newcommand\UPDATE[1]{\ContentsNote{UPDATE}\textit{\textbf{\color{red}This needs to be updated:} {\color{blue}#1}}}
-
-% Mark text as a notification
-%TC:macro TODO [text]
-\newcommand\TODO[1]{\ContentsNote{TODO}\textit{\textbf{\color{red}#1}}}
-
-% BUG
-%TC:macro BUG [text]
-\newcommand\BUG[1]{{\color{red}#1}}
-
-\makeatletter
-
-\renewcommand\@maketitle{%
-\newpage\null\vskip 2em%
-\begin{center}%
-\let\footnote\thanks
-{\LARGE \@title \par}%
-\end{center}%
-\par
-\vskip 1.5em
-}
-
-\renewcommand\abstractname{Abstract}
-\renewenvironment{abstract}{%
-  \begin{center}%
-    {\slshape\bfseries\large\abstractname\vspace{-.5em}\vspace{\z@}}%
-  \end{center}%
-  \vskip 4pt
-  \slshape
-}{
-\vskip 0.5em
-}
-
-\makeatother

Deleted: trunk/Master/texmf-dist/doc/support/texcount/sub_addrules.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/sub_addrules.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/sub_addrules.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,29 +0,0 @@
-% Subsection: TC-commands for adding macro rules
-
-\begin{description}
-\sloppy
-
-\option[macro \parm{macroname} \parm{parameter-rules}]
-Defines macro handling rule for the specified macro. The parameter is on the form \code{[\textit{rule},\ldots]} where each rule is either a keyword indicating the parsing rule for a macro parameter or \code{option:\textit{rule}} for an optional \code{[]}-enclosed parameter. Alternatively, an integer value $n$ indicates that the $n$ first parameters to the macro should be ignored, equivalent to giving a list of $n$ \code{ignore} rules.
-
-\option[envir \parm{envirname} \parm{parameter-rules} \parm{content-rule}]
-(The previously used command, \code{group}, remains an alias for \code{envir}, but the name \code{envir} is more appropriate and therefore recommended.)
-This specifies the handling of environments with the given name. The parameter handling rule, applied to parameters following \code{\bs{begin}\{\textit{name}\}}, are specified as in the \code{macro} instruction. The second parameter specifies the rule, i.e. parser state, with which the contents should be parsed.
-
-\option[macrocount \parm{macroname} \opt{count-spec.}]
-(An alias for \code{macrocount} is \code{macroword}; the preferred name was changed to reflect that this can count any element, not just words.)
-If a number is provided as the count parameter, this defines the given macro to be counted as the specified number of words; if no count is specified, it is assumed to be 1. Alternatively, a \code{[]}-enclosed list of counters can be specified (using the counter keywords), causing each of them to be incremented: counter are \code{word}/\code{text}, \code{headerword}, \code{otherword}, \code{header}, \code{float}, \code{inlinemath}, \code{displaymath} plus a number of aliases.
-
-\option[breakmacro \parm{macroname}]
-Specify that the given macro should cause a break point.
-
-\option[floatinclude \parm{macroname} \parm{parameter-rules}]
-Specify macro handling rules used within float groups. The handling rules are specified as for \code{macro}. Most commonly, the parameter rule will be the \code{otherword}/\code{oword} to specify that words should be counted as \textit{other words}.
-
-\option[preambleinclude \parm{macroname} \parm{parameter-rules}]
-Specifies macro handling rules to be used in the preamble: the text between \code{\bs{documentclass}} and \code{\bs{begin}\{document\}}. The rule is specified like the \code{macro} rules.
-
-\option[fileinclude \parm{macroname} \parm{file-path-spec.}]
-Specifies macros that cause files to be included when \TeXcount{} is run with the \code{-inc} option. The parameters specify the format on which the file path is specified, and can also be used to modify the search path used within the included document.
-
-\end{description}

Deleted: trunk/Master/texmf-dist/doc/support/texcount/sub_options.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/sub_options.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/sub_options.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,142 +0,0 @@
-% Subsection: command line options
-
-\begin{description}
-\sloppy
-\def\option[#1]{\item[\quad\code{#1}]\hskip 0pt plus 10pt}
-\def\alt#1{[#1]}
-
-\option[-v]Verbose (same as \code{-v3}).
-
-\option[-v0]No details (default).
-
-\option[-v1]Prints counted text, marks formulae.
-
-\option[-v2]Also prints ignored text.
-
-\option[-v3]Also includes comments and options.
-
-\option[-v4]Same as \code{-v3 -showstate}.
-
-\option[-v=\ldots, -v\alt{[0-4]}\ldots]Allows detailed control of which elements are included in the verbose output. The provided values is a list of styles or style categories separated by \code{+} or \code{-} to indicate if they should be added or removed from the list of included styles. Style categories start with capital letter and include \code{Words}, \code{Macros}, \code{Options}; the individual styles are in all lower case and include \code{word}, \code{hword}, \code{option}, \code{ignore}.
-
-\option[-showstate]Show internal states (with verbose).
-
-\option[-brief]Only prints a one line summary of the counts for each file.
-
-\option[-q, -quiet]Quiet mode, does not print error messages. Use is discouraged, but it may be useful when piping the output into another application.
-
-\option[-strict]Prints a warning of begin-end groups for which no specific rule is defined.
-
-\option[-total]Only give total sum, no per file sums.
-
-\option[-1]Same as specifying \code{-brief} and \code{-total}, and ensures there will only be one line of output. If used with \code{-sum}, the output will only be the total number.
-
-\option[-0]Same as \code{-1}, i.e. \code{-brief} and \code{-total}, but does not put a line shift at the end. This may be useful when the one line output is to be used by another application, e.g. Emacs, for which the line shift would otherwise need to be stripped away.
-
-\option[-template="\ldots"]Specify an output template which is used to generate the summary output for each file and for the total count. Codes \code{\{\textit{label}\}} is used to include values, where \code{\textit{label}} is one of \code{0} to \code{7} (for the counts), \code{SUM}, \code{ERROR} or \code{TITLE} (first character of label is sufficient). Conditional inclusion is done using \code{\{\textit{label}?\textit{text}?\textit{label}\}} or \code{\{\textit{label}?\textit{if non-zero}|\textit{if zero}?\textit{label}\}}. If the count contains at least two subcounts, use \code{\{SUB|\textit{template}|SUB\}} with a separate template for the subcounts, or \code{\{SUB?\textit{prefix}|\textit{template}|\textit{suffix}?SUB\}}.
-
-\option[-sub\alt{=\ldots}, -subcount\alt{=\ldots}]Generate subcounts. Valid option values are \code{none}, \code{part}, \code{chapter}, \code{section} and \code{subsection} (default), indicating at which level subcounts are generated. (On by default.)
-
-\option[-nosub]Do not generate subcounts.
-
-\option[-sum\alt{=n,n,\ldots}]Produces total sum, default being all words and formulae, but customisable to any weighted sum of the seven counts (list of weights for text words, header words, caption words, headers, floats, inlined formulae, displayed formulae).
-
-\option[-nosum]Do not generate total sum. (Default choice.)
-
-\option[-col]Use ANSI colour codes in verbose output. This requires ANSI colours which is used on Linux, but may not be available under Windows. On by default on non-Windows systems. 
-
-\option[-nc, -nocol]No colours (colours require ANSI). Default under Windows.
-
-\option[-nosep, -noseparator]No separating character/string added after each word in the verbose output (default).
-
-\option[-sep=, -separator=]Separating character or string to be added after each word in the verbose output.
-
-\option[-relaxed]Relaxes the rules for matching words and macro options.
-
-\option[-restricted]Restricts the rules for matching words and macro options.
-
-\option[-]Read \LaTeX{} code from STDIN.
-
-\option[-inc]Parse included files (as separate files).
-
-\option[-merge]Merge included files into document (in place).
-
-\option[-noinc]Do not parse or merge in included files (default).
-
-\option[-incbib]Include bibliography in count, include bbl file if needed.
-
-\option[-nobib]Do not include bibliography in count (default).
-
-\option[-incpackage=]Include rules for a given package.
-
-\option[-dir\alt{=\ldots}]Specify working directory which will serve as root for all include files. The default (\code{-dir=.}) is to use the current directory, i.e. from which \TeXcount{} is executed: the path can be absolute or relative to the current directory. Use \code{-dir} to use the path of the main \LaTeX{} document as working directory.
-
-\option[-auxdir\alt{=\ldots}]Specify the directory of the auxiliary files, e.g. the bibliography (\code{.bbl}) file. The default setting (\code{-auxdir} only) indicates that auxiliary files are in the working directory (from the \code{-dir} or \code{-dir=} option). If \code{-auxdir=} is used to provide a path and \code{-dir=} is used to specify the working directory, the path to the auxiliary directory is take to be relative to the current folder (from which \TeXcount{} is executed); if used with \code{-dir}, the working directory is taken from the path of the parsed file, and the auxiliary directory is taken to be relative to this (unless an absolute path is provided).
-
-\option[-enc=, -encoding=]Specify encoding to use in input (and text output).
-
-\option[-utf8, -unicode]Use UTF-8 (Unicode) encoding. Same as \code{-encoding=utf8}.
-
-\option[-alpha=, -alphabets=]List of Unicode character groups (or digit, alphabetic) permitted as letters. Names are separated by \code{,} or \code{+}. If list starts with \code{+}, the alphabets will be added to those already included. The default is Digit+alphabetic.
-
-\option[-logo=, -logograms=]List of Unicode character groups interpreted as whole word characters, e.g. Han for Chinese characters. Names are separated by \code{,} or \code{+}. If list starts with \code{+}, the alphabets will be added to those already included. By default, this is set to include Ideographic, Katakana, Hiragana, Thai and Lao.
-
-\option[-ch, -chinese, -zhongwen]Turn on Chinese mode in which Chinese characters are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (GB2312, Big5, Hz) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
-
-\option[-jp, -japanese]Turn on Japanese mode in which Japanese characters (Kanji and Kana) are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (e.g. EUC-JP) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
-
-\option[-kr, -korean]Turn on Korean mode in which Korean characters (Hangul and Han) are counted. I recommend using UTF-8, although \TeXcount{} will also test other encodings (e.g. EUC-KR) if UTF-8 fails, and other encodings may be specified by \code{-encoding=}.
-
-\option[-kr-words, -korean-words]Korean mode in which Hangul words are counted (i.e. as words separated by spaces) rather than characters. Han characters are still counted as characters. See also \code{-korean}.
-
-\option[-chinese-only, ..., -korean-words-only]As options \code{-chinese}, ..., \code{-korean-words}, but also excludes other alphabets (e.g. letter-based words) and logographic characters.
-
-\option[-char, -letter]Count letters instead of words. This count does not include spaces.
-
-\option[-out=]Send output to file. Takes file name as value.
-
-\option[-html]Output in HTML format.
-
-\option[-htmlcore]Only HTML body contents.
-
-\option[-htmlfile=]File containing a template HTML document with \code{<!-- TeXcount -->} included somewhere to indicate the location where the TeXcount output from the parsing should be inserted.
-
-\option[-tex]Encode \TeX{} special characters for output into \TeX{} code.
-
-\option[-css=]Reference to CSS to be included in the HTML output instead of including the style definition directly in the output.
-
-\option[-cssfile=, -css=file:]File containing style definitions to be included into the HTML output instead of the default styles.
-
-\option[-freq\alt{=\#}]Count individual word frequencies. Optionally, give minimal frequency required to be included in output.
-
-\option[-stat]Produce statistics on language usage, i.e. based on the alphabets and logograms included.
-
-\option[-macrostat, -macrofreq]Produce statistics on package, environment and macro usage. 
-
-\option[-codes]Display an overview of the colour codes. Can be used as a separate option to only display the colour codes, or together with files to parse.
-
-\option[-nocodes]Do not display overview of colour codes.
-
-\option[-opt=, -optionfile=]Reads options (command line parameters) from a specified text file. Should use one option per line. May also include TC options in the same format as specified in \LaTeX{} documents, but prefixed by \code{\%} rather than \code{\%TC:}. Blank lines and lines starting with \code{\#} are ignored; lines starting with \code{\bs{}} are considered to be continuations of the previous line.
-
-\option[-split, -nosplit]The \code{-split} option, which is on by default, speeds up handling of large files by splitting the file into paragraphs. To turn it off, use the \code{-nosplit} option.
-
-\option[-showver, -nover]Include version number in output with \code{-showver}; use \code{-nover} not to show it (default). 
-
-\option[-h, -?, --help, /?]Help.
-
-\option[-h=, -?=, --help=, /?=]Help on particular macro or group name: gives the parsing rule for that macro or group if defined. If the the macro or environment is package specific, use \code{-h=\parm{package}:\parm{name}}; replace \code{\parm{package}} with \code{class\%\parm{name}} if it is specific to a document class.
-
-\option[-help-options, -h-opt]Lists all TeXcount options and help on them.
-
-\option[--help-option=, -h-opt=]Lists all TeXcount options containing the provided string: e.g. \code{-h-opt=inc} lists all options containing \code{inc}, while \code{-h-opt=-v} lists all options starting with \code{v}.
-
-\option[-help-style, -h-style]Lists all styles and style categories, i.e. those permitted used in -v={styles-list}.
-
-\option[-help-style=, -h-style=]Gives description of style or style category. 
-
-\option[-ver, --version]Print version number.
-
-\option[-lic, --license]License information.
-
-\end{description}

Deleted: trunk/Master/texmf-dist/doc/support/texcount/sub_tc_other.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/texcount/sub_tc_other.tex	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/doc/support/texcount/sub_tc_other.tex	2017-09-19 22:01:25 UTC (rev 45338)
@@ -1,28 +0,0 @@
-%% Other TC instructions
-
-\begin{description}
-
-\option[break \parm{title}]
-Break point which initiates a new subcount. The title is used to identify the following region in the summary output.
-
-\option[incbib \optiontext{or} includebibliography]
-Sets bibliography inclusion, same as running \TeXcount{} with the option \code{-incbib}.
-
-\option[subst \parm{macro} \parm{text}]
-This substitutes a macro with any text. The verbose output will show the substituted text: e.g. \code{\%TC:subst \bs{test} TEST} will cause a following \code{\bs{newcommand}\bs{test}\{TEST\}} to be changed into \code{\bs{newcommand} TEST\{TEST\}}, which \TeXcount{} will interpret differently. Use with care!
-
-\option[ignore]
-Indicates start of a region to be ignored. End region with \code{\%TC:endignore}.
-
-\option[insert \parm{\TeX-code}]
-Insert \TeX{} code for \TeXcount{} to process.
-
-\option[newcounter \parm{name} \opt{description}]
-Define a new counter with the given name and description (optional). A corresponding parsing rule will also be added with the same name.
-
-\option[newtemplate \optiontext{and} template \opt{template-line}]
-Specify a template for the summary output. The first line should just declare a new template using \code{\%TC:newtemplate}, while the subsequent lines use \code{\%TC:template} followed by text specifying the template. The line breaks in the template specification are not of importance: to specify a line break, use \code{\bs{n}}.
-
-\end{description}
-
-

Modified: trunk/Master/texmf-dist/scripts/texcount/texcount.pl
===================================================================
--- trunk/Master/texmf-dist/scripts/texcount/texcount.pl	2017-09-19 21:51:42 UTC (rev 45337)
+++ trunk/Master/texmf-dist/scripts/texcount/texcount.pl	2017-09-19 22:01:25 UTC (rev 45338)
@@ -6,17 +6,40 @@
 use Text::Wrap;
 use Term::ANSIColor;
 
-BEGIN {
-  if ($^O=~/^MSWin/) {
+# System variables
+my $terminalwidth;
+
+# Conditional package inclusion
+if ($^O=~/^MSWin/) {
+  eval {
     require Win32::Console::ANSI;
-    Win32::Console::ANSI::->import();
+    import Win32::Console::ANSI;
+  };
+  if ($@) {
+    option_ansi_colours(0);
+    print STDERR "NOTE: Package Win32::Console::ANSI required for colour coded output.\n";
   }
 }
 
+# Terminal or not
+if (-t STDOUT) { # If in terminal
+  eval {
+    require Term::ReadKey;
+    import Term::ReadKey;
+    ($terminalwidth)=GetTerminalSize();
+  };
+} else {
+  option_ansi_colours(0);
+}
+
+if (!defined $terminalwidth) {$terminalwidth=76;}
+elsif ($terminalwidth<60) {$terminalwidth=60;}
+elsif ($terminalwidth>120) {$terminalwidth=120;}
+
 ##### Version information
 
-my $versionnumber="3.0";
-my $versiondate="2013 Jul 29";
+my $versionnumber="3.1";
+my $versiondate="2017 Sep 16";
 
 ###### Set global settings and variables
 
@@ -25,7 +48,7 @@
    ('versionnumber'  => $versionnumber
    ,'versiondate'    => $versiondate
    ,'maintainer'     => 'Einar Andreas Rodland'
-   ,'copyrightyears' => '2008-2013'
+   ,'copyrightyears' => '2008-2017'
    ,'website'        => 'http://app.uio.no/ifi/texcount/'
    );
 
@@ -119,7 +142,7 @@
 my $_STDIN_='<STDIN>'; # File name to represent STDIN (must be '<...>'!)
 
 # CMD specific settings
-$Text::Wrap::columns=76; # Page width for wrapped output
+$Text::Wrap::columns=$terminalwidth; # Page width for wrapped output
 
 ###### Set state identifiers and methods
 
@@ -206,6 +229,7 @@
 my $STATE_EXCLUDE_STRONG=-20;
 my $STATE_EXCLUDE_STRONGER=-30;
 my $STATE_EXCLUDE_ALL=-40;
+my $STATE_SPECIAL_ARGUMENT=-90;
 my $STATE_PREAMBLE=-99;
 my $STATE_TEXT=1;
 my $STATE_TEXT_HEADER=2;
@@ -236,6 +260,7 @@
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_STRONG,-2,'xx');
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_STRONGER,-3,'xxx');
 add_keys_to_hash(\%key2state,$STATE_EXCLUDE_ALL,-4,'xall');
+add_keys_to_hash(\%key2state,$STATE_SPECIAL_ARGUMENT,'specarg','spescialarg','specialargument');
 add_keys_to_hash(\%key2state,$_STATE_OPTION,'[',' option',' opt',' optional');
 add_keys_to_hash(\%key2state,$_STATE_NOOPTION,'nooption','nooptions','noopt','noopts');
 add_keys_to_hash(\%key2state,$_STATE_AUTOOPTION,'autooption','autooptions','autoopt','autoopts');
@@ -245,6 +270,7 @@
     $STATE_EXCLUDE_ALL,
     $STATE_EXCLUDE_STRONGER,
     $STATE_EXCLUDE_STRONG,
+    $STATE_SPECIAL_ARGUMENT,
     $STATE_FLOAT,
     $STATE_MATH,
     $STATE_IGNORE,
@@ -281,6 +307,7 @@
     $STATE_EXCLUDE_STRONGER => 'stronger exclude: ignore environments and macro paramters',
     $STATE_EXCLUDE_ALL      => 'exlude all: even {, only scan for end marker',
     $STATE_PREAMBLE         => 'preamble: from \documentclass to \begin{document}',
+    $STATE_SPECIAL_ARGUMENT => 'special macro argument that TeXcount may process further',
     $STATE_TEXT             => 'text: count words',
     $STATE_TEXT_HEADER      => 'header text: count words as header words',
     $STATE_TEXT_FLOAT       => 'float text: count words as float words (e.g. captions)',
@@ -287,8 +314,18 @@
     $STATE_TO_HEADER        => 'header: count header, then count words as header words',
     $STATE_TO_FLOAT         => 'float: count float, then count words as float/other words',
     $STATE_TO_INLINEMATH    => 'inline math: count as inline math/equation',
-    $STATE_TO_DISPLAYMATH   => 'displayed math: count as displayed math/equation');
+    $STATE_TO_DISPLAYMATH   => 'displayed math: count as displayed math/equation',
+    $_STATE_OPTION          => 'rule for [] option follows',
+    $_STATE_NOOPTION        => 'no [] options allowed here',
+    $_STATE_AUTOOPTION      => 'automatic [] option gobbling');
 
+# Short state name for each state for use with -showstates
+my %state2key = ($STATE_PREAMBLE=>'pre');
+for my $key ('x','xx','xxx','xall','w','hw','ow','eq','ds',
+        'head','float','isfloat','ismath','specarg') {
+  $state2key{$key2state{$key}}=$key;
+}
+
 # Parsing state presentation style
 my %state2style=(
     $STATE_TEXT        => 'word',
@@ -330,6 +367,8 @@
 # TODO: Should do a conversion based on STATE values.
 sub state_to_text {
   my $st=shift @_;
+  my $statename = $state2key{$st};
+  if (defined $statename) {$st=$statename;}
   return $st;
 }
 
@@ -350,6 +389,7 @@
   push @countdesc,$desc;
   if (defined $sumweights[$like]) {$sumweights[$cnt]=$sumweights[$like];}
   $key2state{$key}=$state;
+  $state2key{$state}=$key;
   $state2cnt{$state}=$cnt;
   $state2style{$state}='altwd';
   push @STATE_MID_PRIORITY,$state;
@@ -378,18 +418,18 @@
 my $STYLE_EMPTY=' ';
 my $STYLE_BLOCK='-';
 my $NOSTYLE=' ';
-$STYLES{'Errors'}={'error'=>'bold red'};
+$STYLES{'Errors'}={'error'=>'bold red','note'=>'bold white'};
 $STYLES{'Words'}={'word'=>'blue','hword'=>'bold blue','oword'=>'blue','altwd'=>'blue'};
-$STYLES{'Macros'}={'cmd'=>'green','fileinc'=>'bold green'};
+$STYLES{'Macros'}={'cmd'=>'green','fileinc'=>'bold green','special'=>'bold red','specarg'=>'red'};
 $STYLES{'Options'}={'option'=>'yellow','optparm'=>'green'};
 $STYLES{'Ignored'}={'ignore'=>'cyan','math'=>'magenta'};
 $STYLES{'Excluded'}={'exclcmd'=>'yellow','exclenv'=>'yellow','exclmath'=>'yellow','mathcmd'=>'yellow'};
-$STYLES{'Groups'}={'document'=>'red','envir'=>'red','mathgroup'=>'magenta'};
+$STYLES{'Groups'}={'document'=>'bold red','envir'=>'red','mathgroup'=>'magenta'};
 $STYLES{'Comments'}={'tc'=>'bold yellow','comment'=>'yellow'};
 $STYLES{'Sums'}={'cumsum'=>'yellow'};
 $STYLES{'States'}={'state'=>'cyan underline'};
-$STYLES{'<core>'}={%{$STYLES{'Errors'}},$STYLE_EMPTY=>$NOSTYLE,'<printlevel>'=>1};
-$STYLES{0}={%{$STYLES{'Errors'}},'<printlevel>'=>0};
+$STYLES{'<core>'}={%{$STYLES{'Errors'}},$STYLE_EMPTY=>$NOSTYLE,'<printlevel>'=>1,'note'=>'bold white'};
+$STYLES{0}={%{$STYLES{'Errors'}},'<printlevel>'=>0,'note'=>'bold white'};
 $STYLES{1}={%{$STYLES{'<core>'}},%{$STYLES{'Words'}},%{$STYLES{'Groups'}},%{$STYLES{'Sums'}}};
 $STYLES{2}={%{$STYLES{1}},%{$STYLES{'Macros'}},%{$STYLES{'Ignored'}},%{$STYLES{'Excluded'}}};
 $STYLES{3}={%{$STYLES{2}},%{$STYLES{'Options'}},%{$STYLES{'Comments'}},'<printlevel>'=>2};
@@ -398,7 +438,8 @@
 my %STYLE=%{$STYLES{$defaultVerbosity}};
 
 my @STYLE_LIST=('error','word','hword','oword','altwd',
-  'ignore','document','cmd','exclcmd','option','optparm','envir','exclenv',
+  'ignore','document','special','cmd','exclcmd',
+  'option','optparm','envir','exclenv','specarg',
   'mathgroup','exclmath','math','mathcmd','comment','tc','fileinc','state','cumsum');
 my %STYLE_DESC=(
   'error'       => 'ERROR: TeXcount error message',
@@ -408,10 +449,12 @@
   'altwd'       => 'Words in user specified counters: counted in separate counters',
   'ignore'      => 'Ignored text or code: excluded or ignored',
   'document'    => '\documentclass: document start, beginning of preamble',
+  'special'     => 'Special macros, eg require special handling or have side-effects',
   'cmd'         => '\macro: macro not counted, but parameters may be',
   'exclcmd'     => '\macro: macro in excluded region',
   'option'      => '[Macro options]: not counted',
   'optparm'     => '[Optional parameter]: content parsed and styled as counted',
+  'specarg'     => 'Special argument, eg with side-effects',
   'envir'       => '\begin{name}  \end{name}: environment',
   'exclenv'     => '\begin{name}  \end{name}: environment in excluded region',
   'mathgroup'   => '$  $: counted as one equation',
@@ -434,7 +477,7 @@
    mu nu xi pi rho sigma tau upsilon phi chi psi omega
    Gamma Delta Theta Lambda Xi Pi Sigma Upsilon Phi Psi Omega 
    /;
-my $specialchars='\\\\('.join('|', at LetterMacros).')(\{\}|\s*|\b)';
+my $specialchars='\\\\('.join('|', at LetterMacros).')(\{\}|\s+|\b)';
 my $modifiedchars='\\\\[\'\"\`\~\^\=](@|\{@\})';
 my %NamedLetterPattern;
 $NamedLetterPattern{'restricted'}='@';
@@ -459,7 +502,7 @@
 # a macro.
 my %NamedMacroOptionPattern;
 $NamedMacroOptionPattern{'default'}='\[[^\[\]\n]*\]';
-$NamedMacroOptionPattern{'relaxed'}='\[[^\[\]\n]*(\n[^\[\]\n]+)\n?\]';
+$NamedMacroOptionPattern{'relaxed'}='\[\n?([^\[\]\n]\n?)*\]';
 $NamedMacroOptionPattern{'restricted'}='\[(\w|[,\-\s\~\.\:\;\+\?\*\_\=])*\]';
 my $MacroOptionPattern=$NamedMacroOptionPattern{'default'};
 
@@ -538,9 +581,12 @@
 ###### Define core rules
 
 ### Macros indicating package inclusion
-# Will always be assumed to take one parameter (plus options).
+# Will always be assumed to take one extra parameter which is the list of
+# packages. Macro handling rule indicates parameters ignored prior to that.
 # Gets added to TeXmacro. After that, values are not used, only membership.
-my %TeXpackageinc=('\usepackage'=>1,'\RequirePackage'=>1);
+# Handling is otherwise hard-coded rather than rule based.
+my %TeXpackageinc;
+add_keys_to_hash(\%TeXpackageinc,['[','ignore','specialargument'],'\usepackage','\RequirePackage');
 
 ### Macros that are counted within the preamble
 # The preamble is the text between \documentclass and \begin{document}.
@@ -609,7 +655,7 @@
     '\setlength','\addtolength','\settodepth','\settoheight','\settowidth','\setcounter',
     '\addtocontents','\addtocounter',
     '\fontsize');
-add_keys_to_hash(\%TeXmacro,3,'\multicolumn','\addcontentsline');
+add_keys_to_hash(\%TeXmacro,3,'\addcontentsline');
 add_keys_to_hash(\%TeXmacro,6,'\DeclareFontShape');
 add_keys_to_hash(\%TeXmacro,['[','text','ignore'],
     '\cite','\nocite','\citep','\citet','\citeauthor','\citeyear','\citeyearpar',
@@ -678,6 +724,7 @@
 
 ### Convert state keys to codes
 convert_hash(\%TeXpreamble,\&keyarray_to_state);
+convert_hash(\%TeXpackageinc,\&keyarray_to_state);
 convert_hash(\%TeXfloatinc,\&keyarray_to_state);
 convert_hash(\%TeXmacro,\&keyarray_to_state);
 convert_hash(\%TeXmacrocount,\&keyarray_to_cnt);
@@ -822,6 +869,7 @@
 ###### Main script
 
 
+
 ###################################################
 
 MAIN(@ARGV);
@@ -837,8 +885,8 @@
 # MAIN ROUTINE: Handle arguments, then parse files
 sub MAIN {
   my @args;
- push @args, at StartupOptions;
- push @args, at _;
+  push @args, at StartupOptions;
+  push @args, at _;
   Initialise();
   Check_Arguments(@args);
   my @toplevelfiles=Parse_Arguments(@args);
@@ -875,36 +923,46 @@
 # Check arguments, exit on exit condition
 sub Check_Arguments {
   my @args=@_;
-  my $arg=$args[0];
   if (!@args) {
     print_version();
-    print_short_help();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))$/) {
     print_help();
     exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))=(.*)$/) {
-    print_help_on_rule($4);
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(opt|options?)$/) {
-    print_syntax();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(opt|options?)=(.*)$/) {
-    print_syntax_subset($5);
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(styles?)$/) {
-    print_help_on_styles();
-    exit;
-  } elsif ($arg=~/^(--?(h|\?|help)|\/(\?|h))-?(styles?)=(\w+)$/) {
-    print_help_on_styles($5);
-    exit;
-  } elsif ($arg=~/^--?(ver|version)$/) {
-    print_version();
-    exit;
-  } elsif ($arg=~/^--?(lic|license|licence)$/) {
-    print_license();
-    exit;
   }
+  for my $arg (@args) {
+    $arg=~s/^(--?(h|\?|help)|\/(\?|h))\b/-h/;
+    $arg=~s/[=:]/=/;
+    if ($arg=~/^-h$/) {
+      print_help();
+      exit;
+    } elsif ($arg=~/^-(h-)?(man|manual)$/) {
+      print_help_man();
+      exit;
+    } elsif ($arg=~/^-h-?(opt|options?)$/) {
+      print_help_options();
+      exit;
+    } elsif ($arg=~/^-h-?(opt|options?)=(.*)$/) {
+      print_help_options_subset($2);
+      exit;
+    } elsif ($arg=~/^-h(-rule)?=(.*)$/) {
+      print_help_on_rule($2);
+      exit;
+    } elsif ($arg=~/^-h-styles?$/) {
+      print_help_on_styles();
+      exit;
+    } elsif ($arg=~/^-h-styles?=(\w+)$/) {
+      print_help_on_styles($1);
+      exit;
+    } elsif ($arg=~/^-h-(tc|(tc)?inst(ructions?))?$/) {
+      print_help_tcinst();
+      exit;
+    } elsif ($arg=~/^--?(ver|version)$/) {
+      print_version();
+      exit;
+    } elsif ($arg=~/^--?(lic|license|licence)$/) {
+      print_license();
+      exit;
+    }
+  }
   return 1;
 }
 
@@ -913,12 +971,15 @@
   my @args=@_;
   my @files;
   foreach my $arg (@args) {
-    if (parse_option($arg)) {next;}
     if ($arg=~/^\-/) {
-      print "Invalid opton $arg \n\n";
-      print_short_help();
+      $arg=~s/[=:]/=/;
+      if (parse_option($arg)) {next;}
+      print "Invalid option $arg \n\n";
+      print_help();
       exit;
-    }
+    } elsif ($arg=~/^@\-/) { # ignored option
+      next;
+    } 
     $arg=~s/\\/\//g;
     push @files,$arg;
   }
@@ -1003,8 +1064,9 @@
   my $arg=shift @_;
   if (!defined $arg) {
     @sumweights=(0,1,1,1,0,0,1,1);
-  } elsif ($arg=~/^(\d+(\.\d*)?(,\d+(\.\d*)?){0,6})$/) {
-    @sumweights=(0,split(',',$1));
+  } elsif ($arg=~/^(\d+(\.\d*)?([,+]\d+(\.\d*)?){0,6})$/) {
+    @sumweights=(0,split(/[,+]/,$arg));
+    print STDERR "SUMWEIGHTS: ",join(', ', at sumweights),"\n";
   } else {
     print STDERR "Warning: Option value $arg not valid, ignoring option.\n";
   }
@@ -1039,7 +1101,11 @@
   elsif ($arg eq '-nover') {$showVersion=-1;}
   elsif ($arg =~/^-nosep(arator)?s?$/ ) {$separator='';}
   elsif ($arg =~/^-sep(arator)?s?=(.*)$/ ) {$separator=$2;}
-  elsif ($arg =~/^-out=(.*)/ ) {close STDOUT; open STDOUT,'>',$1;}
+  elsif ($arg =~/^-out=(.*)/ ) {
+    close STDOUT;
+    open STDOUT,'>',$1 or die "Could not open out file for writing: $1";
+  }
+  elsif ($arg =~/^-out-stderr/ ) {select STDERR;}
   else {return 0;}
   return 1;
 }
@@ -1099,10 +1165,11 @@
 # Parse option file TC options
 sub __optionfile_tc {
   my $arg=shift @_;
+  if ($arg=~/^\%\%/) {return 1;}
   $arg=~s/^\%\s*// || return 0;
   if ($arg=~/^subst\s+(\\\w+)\s+(.*)$/i) {
     $substitutions{$1}=$2;
-  } elsif ($arg=~/^(\w+)\s+([\\]*\w+)\s+([^\s\n]+)(\s+([0-9]+))?/i) {
+  } elsif ($arg=~/^(\w+)\s+([\\]*\w+)\s+([^\s\n]+)(\s+(\-?[0-9]+|\w+))?/i) {
     tc_macro_param_option($Main,$1,$2,$3,$5) || die "Invalid TC option: $arg\n";
   } else {
     print "Invalid TC option format: $arg\n";
@@ -1115,7 +1182,9 @@
 sub Parse_file_list {
   my @files=@_;
   my $listtotalcount=new_count('Total');
-  foreach (@files) {s/\\/\//g; s/ /\\ /g;}
+  foreach (@files) {
+    $_='"'.$_.'"'
+  }
   if (@files) {
     @files=<@files>; # For the sake of Windows: expand wildcards!
     for my $file (@files) {
@@ -1224,7 +1293,7 @@
   my $tex=shift @_;
   my $file=shift @_;
   foreach my $path (@_) {
-    if (!$path=~/[\\\/]$/) {$path.='/';}
+    if ($path && $path!~/[\\\/]$/) {$path.='/';}
     my $filepath=$path.$file;
     if (-e $filepath) {return $filepath;}
     elsif ($filepath=~/\.tex$/i) {}
@@ -1372,6 +1441,13 @@
     $countdesc[2]='Letters in headers';
     $countdesc[3]='Letters in captions';
     return 'letter';
+  } elsif ($language=~/^all-nonspace-(char|character|letter)s?$/) {
+    @WordPatterns=($NamedWordPattern{'letters'});
+    @AlphabetScripts=qw/Digit Is_alphabetic Is_punctuation/;
+    $countdesc[1]='Characters in text';
+    $countdesc[2]='Characters in headers';
+    $countdesc[3]='Characters in captions';
+    return 'nonspace-characters';
   } else {
     return undef;
   }
@@ -1919,6 +1995,36 @@
 ###### Error handling
 
 
+# Print note to output
+sub note {
+  my ($tex,$level,$text,$prefix,$style)=@_;
+  if ($printlevel>=$level) {
+    $prefix=(defined $prefix)?$prefix:'%NOTE: ';
+    $style=(defined $style)?$style:'note';
+    $text=count_in_template($tex->{'subcount'},$text);
+    flush_next($tex);
+    line_return(0,$tex);
+    print_style($prefix.$text,$style);
+    flush_next($tex);
+    $blankline=-1;    
+  }
+}
+
+# Compare count with expected and note if assertion fails
+sub assertion_note {
+  my ($tex,$checktext,$template)=@_;
+  my $count=$tex->{'subcount'};
+  my @check=split(/,/,$checktext);
+  for (my $i=scalar @check;$i>0;$i--) {
+    if ($check[$i-1] ne get_count($count,$i)) {
+      my $msg=$template.' [expected:'.join(',', at check).']';
+      note($tex,0,$msg,'%ASSERTION FAILED: ','error');
+      return 1;
+    }
+  }
+  return 0;
+}
+
 # Add warning to list of registered warnings (optionally to be reported at the end)
 sub warning {
   my ($tex,$text)=@_;
@@ -2012,11 +2118,12 @@
     $simple_token=1;
   }
   my $next;
+  my @specarg;
   while (defined ($next=next_token($tex,$simple_token))) {
     # Parse next token until token matches $end
     set_style($tex,'ignore');
     if ($state==$STATE_MATH) {set_style($tex,'math');}
-    if ((defined $end) && ($end eq $next)) {return;}
+    if ((defined $end) && ($end eq $next)) {return @specarg;}
     # Determine how token should be interpreted
     if ($state==$STATE_PREAMBLE && $next eq '\begin' && $tex->{'line'}=~/^\{\s*document\s*\}/) {
       # \begin{document}
@@ -2028,9 +2135,19 @@
     } elsif ($tex->{'type'}==$TOKEN_SPACE) {
       # space or other code that should be passed through without styling
       flush_next($tex,' ');
+    } elsif ($next eq '{') {
+      # {...} group
+      set_style($tex,'ignore');
+      push @specarg,_parse_unit($tex,$state,'}');
+      set_style($tex,'ignore');
+    } elsif ($next eq '}') {
+      error($tex,'Encountered } without corresponding {.');
     } elsif ($tex->{'type'}==$TOKEN_TC) {
       # parse TC instructions
       _parse_tc($tex,$next);
+    } elsif ($state==$STATE_SPECIAL_ARGUMENT) {
+      set_style($tex,'specarg');
+      push @specarg,$next;
     } elsif ($tex->{'type'}==$TOKEN_WORD) {
       # word
       if (my $cnt=state_text_cnt($state)) {
@@ -2038,13 +2155,6 @@
         inc_count($tex,$cnt);
         set_style($tex,state_to_style($state));
       }
-    } elsif ($next eq '{') {
-      # {...} group
-      set_style($tex,'ignore');
-      _parse_unit($tex,$state,'}');
-      set_style($tex,'ignore');
-    } elsif ($next eq '}') {
-      error($tex,'Encountered } without corresponding {.');
     } elsif ($state==$STATE_EXCLUDE_STRONGER) {
       # ignore remaining tokens
       set_style($tex,'ignore');
@@ -2053,7 +2163,7 @@
       set_style($tex,'document');
       _parse_documentclass_params($tex);
       while (!($tex->{'eof'})) {
-        _parse_unit($tex,$STATE_PREAMBLE);
+        push @specarg,_parse_unit($tex,$STATE_PREAMBLE);
       }
     } elsif ($tex->{'type'}==$TOKEN_MACRO) {
       # macro call
@@ -2072,15 +2182,16 @@
       # handle as parameter that should not be counted
       set_style($tex,'ignore');
     }
-    if (!defined $end) {return;}
+    if (!defined $end) {return @specarg;}
   }
   defined $end && error($tex,'Reached end of file while waiting for '.$end.'.');
+  return @specarg;
 }
 
 # Print state
 sub _set_printstate {
   my ($tex,$state,$end)=@_;
-  $tex->{'printstate'}=':'.state_to_text($state).':'.(defined $end?$end.':':'');
+  $tex->{'printstate'}=':'.state_to_text($state).(defined $end?'>'.$end:'').':';
   flush_next($tex);
 }
 
@@ -2102,6 +2213,7 @@
     next_subcount($tex,$label);
   }
   if ($state==$STATE_MATH) {set_style($tex,'mathcmd');}
+  elsif ($state==$STATE_SPECIAL_ARGUMENT) {set_style($tex,'specarg');}
   else {set_style($tex,state_is_text($state)?'cmd':'exclcmd');}
   if ($next eq '\begin' && state_inc_envir($state)) {
     _parse_envir($tex,$state);
@@ -2111,8 +2223,10 @@
     push @macro,$STRING_ERROR;
   } elsif ($next eq '\verb') {
     _parse_verb_region($tex,$state);
-  } elsif (state_is_parsed($state) && defined $TeXpackageinc{$next} ) {
-    _parse_include_package($tex);
+  } elsif (state_is_parsed($state) && defined (my $substat=$TeXpackageinc{$next})) {
+    # Parse macro parameters, use _parse_include_argument to process package list
+    set_style($tex,'document');
+  	push @macro,__gobble_macro_parms($tex,$substat,$__STATE_NULL,\&_parse_include_argument);
     push @macro,'<package>';
   } elsif (state_is_parsed($state) && defined (my $def=$TeXfileinclude{$next})) {
     # include file (merge in or queue up for parsing)
@@ -2183,7 +2297,7 @@
   } elsif ($instr eq 'insert') {
     $tex->{'line'}="\n".$next.$tex->{'line'};
   } elsif ($instr eq 'subst') {
-    if ($next=~/^(\\\S+)\s+(.*)$/) {
+    if ($next=~/^(\S+)\s*(\S.*)?$/) {
       my $from=$1;
       my $to=$2;
       $substitutions{$from}=$to;
@@ -2192,12 +2306,20 @@
       error($tex,'Invalid %TC:subst format.');
     }
   } elsif ($instr eq 'newcounter') {
-    assert($next=~s/^(\w+)(=(\w+))?\s*//,$tex,'Should have format %TC:newcounter {key}[={like-key}] {description}')
+    assert($next=~s/^(\w+)(=(\w+))?\s*//,$tex,'Expected format: %TC:newcounter {key}[={like-key}] {description}')
     || return;
     my $key=$1;
     my $like=$3;
     if ($next eq '') {$next=$key;}
     add_new_counter($key,$next,$like);
+  } elsif ($instr eq 'log') {
+    assert($next=~s/^(.*)$//,$tex,'Expected format: %TC:log {text or template}') || return;
+    note($tex,1,$1);
+  } elsif ($instr eq 'assert') {
+    assert($next=~s/^(\d+(,\d+)*)(\s+(.*))?$//,$tex,'Expected format: %TC:assert count+count+... {text or template}')
+    || return;
+    my $template=$4 || 'Words counted: {w} in text, {hw} in headers, {ow} other.';
+    assertion_note($tex,$1,$template);
   } elsif ($next=~/^([\\]*\S+)\s+([^\s]+)(\s+(-?\w+))?/) {
     # %TC:instr macro param option
     my $macro=$1;
@@ -2337,7 +2459,7 @@
       if ($param eq 'file') {$file=$2;}
       elsif ($param eq 'texfile') {
         $file=$2;
-        if (!$file=~/\.tex$/i) {$file.='.tex';}
+        if ($file!~/\.tex$/i) {$file.='.tex';}
       }
       else {$params{$param}=$2;}
     }
@@ -2364,7 +2486,6 @@
 sub _parse_include_package {
   my ($tex)=@_;
   set_style($tex,'document');
-  __gobble_option($tex);
   if ( $tex->{'line'}=~s/^\{(([\w\-]+)(\s*,\s*[\w\-]+)*)\}// ) {
     print_style("{$1}",'document');
     foreach (split(/\s*,\s*/,$1)) {
@@ -2377,6 +2498,17 @@
   }
 }
 
+# Extract package names from token list and include packages
+sub _parse_include_argument {
+  my $tex=shift @_;
+  my $args=join('', at _);
+  set_style($tex,'document');
+  foreach (split(/\s*,\s*/,$args)) {
+    $MacroUsage{"<package:$_>"}++;
+    include_package($_,$tex);
+  }
+}
+
 # Parse \documentclass parameters and include rules
 sub _parse_documentclass_params {
   my ($tex)=@_;
@@ -2445,7 +2577,7 @@
 
 # Gobble macro parameters as specified in parm plus options
 sub __gobble_macro_parms {
-  my ($tex,$parm,$oldstat)=@_;
+  my ($tex,$parm,$oldstat,$specarghandler)=@_;
   my $n;
   my @ret;
   if (ref($parm) eq 'ARRAY') {
@@ -2477,7 +2609,10 @@
       # Parse macro parameter
       if ($auto_gobble_options) {push @ret,__gobble_options($tex);}
       push @ret,$STRING_PARAMETER;
-      _parse_unit($tex,__new_state($p,$oldstat),$_PARAM_);
+      my @specarg=_parse_unit($tex,__new_state($p,$oldstat),$_PARAM_);
+      if ($p==$STATE_SPECIAL_ARGUMENT && defined $specarghandler) {
+        &$specarghandler($tex, at specarg);
+      }
     }
   }
   #TODO: Drop default gobbling of option at end?
@@ -2647,7 +2782,9 @@
   my $count=shift @_;
   my $sum=0;
   for (my $i=scalar(@sumweights);$i-->1;) {
-    $sum+=get_count($count,$i)*$sumweights[$i];
+    if ($sumweights[$i]) {
+      $sum+=get_count($count,$i)*$sumweights[$i];
+    }
   }
   return $sum;
 }
@@ -2928,6 +3065,7 @@
 # Print count summary for a count object
 sub print_count {
   my ($count,$class)=@_;
+  line_return(0);
   if ($htmlstyle) {print "<div class='".($class||'count')."'>\n";}  
   if ($outputtemplate) {
     _print_count_template($count,$outputtemplate);
@@ -3033,8 +3171,8 @@
   __print_count_using_template($count,$template);
 }
 
-# Print counts using template
-sub __print_count_using_template {
+# Return string with counts based on template
+sub count_in_template {
   my ($count,$template)=@_;
   while (my ($key,$cnt)=each %key2cnt) {
     $template=__process_template($template,$key,get_count($count,$cnt));
@@ -3046,9 +3184,15 @@
   $template=__process_template($template,'SUM',get_sum_count($count));
   $template=__process_template($template,'TITLE',$count->{'title'}||'');
   $template=__process_template($template,'SUB',number_of_subcounts($count));
-  print $template;
+  $template=~s/\a//gis;
+  return $template;
 }
 
+# Print counts using template
+sub __print_count_using_template {
+  print count_in_template(@_);
+}
+
 # Print subcounts using template
 sub __print_subcounts_using_template {
   my ($count,$template)=@_;
@@ -3070,7 +3214,7 @@
     $template=~s/\{($label)\?(.*?)\?(\1)\}//gis;
   }
   if (!defined $value) {$value='';}
-  $template=~s/\{($label)\}/$value/gis;
+  $template=~s/\{($label)\}/$value\a/gis;
   return $template;
 }
 
@@ -3114,6 +3258,7 @@
     } elsif ($tex->{'line'}=~s/^([ \t\f]+)//) {
       if ($prt) {print $1;}
     }
+    if ($tex->{'line'}=~/^\%TC:/i) {return;}
     if ($tex->{'line'}=~s/^(\%+[^\r\n]*)//) {
       print_style($1,'comment');
       $ret=1;
@@ -3204,11 +3349,6 @@
   wprintstringdata('Version');
 }
 
-# Print TeXcount reference text
-sub print_reference {
-  wprintstringdata('Reference');
-}
-
 # Print TeXcount licence text
 sub print_license {
   wprintstringdata('License');
@@ -3215,18 +3355,30 @@
 }
 
 # Print short TeXcount help
-sub print_short_help {
+sub print_help {
   wprintstringdata('ShortHelp');
 }
 
+# Print main TeXcount help
+sub print_help_man {
+  wprintstringdata('HelpTitle');
+  wprintstringdata('HelpText');
+  wprintstringdata('Reference');
+}
+
+# Print help on TC instructions
+sub print_help_tcinst {
+  wprintstringdata('TCinstructions');
+}
+
 # Print TeXcount options list
-sub print_syntax {
+sub print_help_options {
   wprintstringdata('OptionsHead');
-  wprintstringdata('Options','@ -          :');
+  wprintstringdata('Options',StringDatum('OptionsFormat'));
 }
 
-# Prinst TeXcount options containing substring
-sub print_syntax_subset {
+# Print TeXcount options containing substring
+sub print_help_options_subset {
   my $pattern=shift @_;
   my $data=StringData('Options');
   if (!defined $data) {
@@ -3240,29 +3392,10 @@
   if (scalar(@options)==0) {print "No options contained $pattern.\n";}
   else {
     print "Options containing \"$pattern\":\n\n";
-    wprintlines('@ -          :', at options);
+    wprintlines(StringDatum('OptionsFormat'), at options);
   }
 }
 
-# Print complete TeXcount help
-sub print_help {
-  print_help_title();
-  print_syntax();
-  print_help_text();
-  print_reference();
-}
-
-# Print help title 
-sub print_help_title {
-  wprintstringdata('HelpTitle');
-}
-
-# Print help text
-sub print_help_text {
-  wprintstringdata('HelpText');
-  wprintstringdata('TCinstructions');
-}
-
 # Print help on specific macro or environment
 sub print_help_on_rule {
   my $arg=shift @_;
@@ -3326,16 +3459,20 @@
 # Print macro handling rule
 sub _print_rule_macro {
   my ($arg,$def)=@_;
-  if (ref($def) eq 'ARRAY') {
+  if (!defined $def) {
+    print "Takes no parameter(s).\n";
+  } elsif (ref($def) eq 'ARRAY') {
     my $optionflag=0;
-    print "Takes the following parameter(s):\n";
+    print "Takes has the following parameters and parameter rules:\n";
     foreach my $state (@{$def}) {
       if ($state==$_STATE_OPTION) {$optionflag=1;}
+      elsif ($state==$_STATE_NOOPTION) {print " - no [] options permitted here\n";}
+      elsif ($state==$_STATE_AUTOOPTION) {}
       elsif ($optionflag) {
         $optionflag=0;
-        print " - Optional [] containing $state2desc{$state}\n";
+        print " + optional [] containing $state2desc{$state}\n";
       } else {
-        print " - $state2desc{$state}\n";
+        print " + $state2desc{$state}\n";
       }
     }
   } else {
@@ -3348,7 +3485,9 @@
   my ($arg,$def)=@_;
   print "Contents parsed as $state2desc{$def}\n";
   if ($def=$TeXmacro{$PREFIX_ENVIR.$arg}) {
-    _print_rule_macro($def);
+    _print_rule_macro($arg,$def);
+  } else {
+    print "Takes no parameter(s).\n";
   }
 }
 
@@ -3484,6 +3623,8 @@
 .mathcmd {color: #6c0;}
 .ignore {color: #999;}
 .exclenv {color:#c66;}
+.special {color:#c66; font-weight: bold;}
+.specarg {color:#c66; font-weight: bold; font-style: italic;}
 .tc {color: #999; font-weight:bold;}
 .comment {color: #999; font-style: italic;}
 .state {color: #990; font-size: 70%;}
@@ -3490,6 +3631,7 @@
 .cumsum {color: #999; font-size: 80%;}
 .fileinc {color: #696; font-weight:bold;}
 .warning {color: #c00; font-weight: 700;}
+.note {color: #c90; font-weight: bold;}
 
 div.filegroup, div.parse, div.stylehelp, div.count, div.sumcount, div.error {
    border: solid 1px #999; margin: 4pt 0pt; padding: 4pt;
@@ -3546,6 +3688,11 @@
   return STRINGDATA()->{$name};
 }
 
+# First line of StringData
+sub StringDatum {
+  return pop @{StringData(@_)};
+}
+
 # Insert value from GLOBALDATA
 sub __apply_globaldata {
   my $name=shift @_;
@@ -3607,9 +3754,9 @@
   my $ind2=6;
   my $i;
   foreach my $line (@lines) {
-    if ($line=~s/^@//) {
-      $ind2=1+index($line,':');
-      $ind1=1+index($line,'-');
+    if ($line=~s/^@/ /) {
+      $ind1=index($line,'-');
+      $ind2=index($line,':');
       if ($ind1<1) {$ind1=$ind2;}
       next;
     }
@@ -3616,7 +3763,7 @@
     my $firstindent=0;
     if ($line=~s/^(\t|\s{2,})(\S)/$2/) {$firstindent=$ind1;}
     my $indent=$firstindent;
-    if ($line=~/^(.*\S)(\t|\s{2,})(.*)$/) {
+    if ($line=~/^(.*?\S)(\t|\s{2,})(.*)$/) {
       $indent=$ind2;
       if ($1 eq '|') {$line=' ';}
       else {$line=$1.'   ';}
@@ -3639,12 +3786,12 @@
 TeXcount version ${versionnumber}, ${versiondate}.
 
 :::::::::: Reference
-The TeXcount script is copyright of ${maintainer} (${copyrightyears}) and published under the LaTeX Project Public Licence.
-
 Go to the TeXcount web page
     ${website}
-for more information about the script, e.g. news, updates, help, usage tips, known issues and short-comings, or to access the script as a web application. Feedback such as problems or errors can be reported to einarro at ifi.uio.no.
+for more help and information about the script: news, updates, help, usage tips, known issues and short-comings, or to access the script as a web application. Feedback such as problems or errors can be reported to einarro at ifi.uio.no.
 
+The TeXcount script is copyright of ${maintainer} (${copyrightyears}) and published under the LaTeX Project Public Licence.
+
 :::::::::: License
 TeXcount version ${versionnumber}
   
@@ -3660,8 +3807,22 @@
 :::::::::: ShortHelp
 Syntax: texcount.pl [options] files
 
-Use option -help (or just -h) to get help; -help-options (-hopt) to get list of command line options, or -help-options=substring for help on all options containing substring.
+Use option -help (or just -h) to get help. For more detailed help, the following alternatives exist:
+@ -                      :
+  -help-man, -man          Manual with more extensive help
+  -help-rule={macro/envir}    Macro/environment handling rule (backslash needed with macros)       
+  -help-options (-hopt)    Get list of command line options
+  -help-options={substring}    Help on options containing substring
+  -help-styles             List styles which determine how different elements (words, macros, etc) are presented in the verbose output
+  -help-style={style}      Describe a particular style or style category
+  -help-tc, -help-instructions    Help on %TC:instruction for inserting TeXcount instructions into the TeX code.
 
+Help, documentation, FAQ and updates are available from the TeXcount web page:
+    ${website}
+or through running
+    texdoc texcount
+on the command line.
+
 ::::::::::::::::::::::::::::::::::::::::
 :::::::::: HelpTitle
 ***************************************************************
@@ -3671,6 +3832,18 @@
 Count words in TeX and LaTeX files, ignoring macros, tables, formulae, etc.
 
 ::::::::::::::::::::::::::::::::::::::::
+:::::::::: HelpText
+The script counts words as either words in the text, words in headers/titles or words in floats (figure/table captions). Macro options (i.e. \\macro[...]) are ignored; macro parameters (i.e. \\macro{...}) are counted or ignored depending on the macro, but by default counted. Begin-end groups are by default ignored and treated as 'floats', though some (e.g. center) are counted.
+
+Mathematical formulae are not counted as words, but are instead counted separately with separate counts for inlined formulae and displayed formulae. Similarly, the number of headers and the number of 'floats' are counted. Note that 'float' is used here to describe anything defined in a begin-end group unless explicitly recognized as text or mathematics.
+
+The verbose options (-v1, -v2, -v3, showstate) produces output indicating how the text has been interpreted. Check this to ensure that words in the text has been interpreted as such, whereas mathematical formulae and text/non-text in begin-end groups have been correctly interpreted.
+
+Summary, as well as the verbose output, may be produced as text (default) or as HTML code using the -html option. The HTML may then be sent to file which may be viewed with you favourite browser.
+
+Under UNIX, unless -nocol (or -nc) has been specified, the output will be colour coded using ANSI colour codes. Counted text is coloured blue with headers are in bold and in HTML output caption text is italicised. Use 'less -r' instead of just 'less' to view output: the '-r' option makes less treat text formating codes properly. Windows does not support ANSI colour codes, and so this is turned off by default.
+
+::::::::::::::::::::::::::::::::::::::::
 :::::::::: OptionsHead
 
 Syntax: texcount.pl [options] files
@@ -3677,7 +3850,7 @@
 
 Options:
 
-:::::::::: OptionsPrefix
+:::::::::: OptionsFormat
 @ -          :
 :::::::::: Options
   -relaxed      Uses relaxed rules for word and option handling: i.e. allows more general cases to be counted as either words or macros.
@@ -3738,10 +3911,13 @@
   -codes        Display output style code overview and explanation. This is on by default.
   -nocodes      Do not display output style code overview.
   -out=         Write output to file, give filename as option value.
+  -out-stderr   Write output to STDERR instead of STDOUT.
   -h, -?, -help, /?    Help text.
-  -h=, -?=, -help=, /?=    Takes a macro or group name as option and returns a description of the rules for handling this if any are defined. If handling rule is package specific, use -incpackage=package name: -incpackage must come before -h= on the command line to take effect.
+  -help-man, -man    Short manual.
+  -h=, -help-rule=    Takes a macro or group name as option and returns a description of the rules for handling this if any are defined. If handling rule is package specific, use -incpackage=package name: -incpackage must come before -h= on the command line to take effect.
   -help-options, -h-opt    List all options.
   -help-options=, -h-opt=   List all options containing the provided string, e.g. -h-opt=dir or -h-opt=-v (the initial - in -v causes only options starting with v to be listed).
+  -help-tc, -help-inst    List all TeXcount instructions insertable as %TC comments in the TeX document.
   -help-style   List the styles and style categories: i.e. those permitted used with -v={styles-list}.
   -help-style=   Give description of style or style category.
   -ver, -version    Print version number.
@@ -3748,17 +3924,6 @@
   -lic, -license, -licence    Licence information.
 
 ::::::::::::::::::::::::::::::::::::::::
-:::::::::: HelpText
-The script counts words as either words in the text, words in headers/titles or words in floats (figure/table captions). Macro options (i.e. \\macro[...]) are ignored; macro parameters (i.e. \\macro{...}) are counted or ignored depending on the macro, but by default counted. Begin-end groups are by default ignored and treated as 'floats', though some (e.g. center) are counted.
-
-Mathematical formulae are not counted as words, but are instead counted separately with separate counts for inlined formulae and displayed formulae. Similarly, the number of headers and the number of 'floats' are counted. Note that 'float' is used here to describe anything defined in a begin-end group unless explicitly recognized as text or mathematics.
-
-The verbose options (-v1, -v2, -v3, showstate) produces output indicating how the text has been interpreted. Check this to ensure that words in the text has been interpreted as such, whereas mathematical formulae and text/non-text in begin-end groups have been correctly interpreted.
-
-Summary, as well as the verbose output, may be produced as text (default) or as HTML code using the -html option. The HTML may then be sent to file which may be viewed with you favourite browser.
-
-Under UNIX, unless -nocol (or -nc) has been specified, the output will be colour coded using ANSI colour codes. Counted text is coloured blue with headers are in bold and in HTML output caption text is italicised. Use 'less -r' instead of just 'less' to view output: the '-r' option makes less treat text formating codes properly. Windows does not support ANSI colour codes, and so this is turned off by default.
-
 :::::::::: TCinstructions
 Parsing instructions may be passed to TeXcount using comments in the LaTeX files on the format
 @ -      :
@@ -3766,14 +3931,14 @@
 and are used to control how TeXcount parses the document. The following instructions are used to set parsing rules which will apply to all subsequent parsing (including other files):
   %TC:macro [macro] [param.states]
     |    macro handling rule, no. of and rules for parameters
-  %TC:macroword [macro] [number]
-    |    macro counted as a given number of words
+  %TC:macrocount [macro] [number]
+    |    macro counted as a given number of words (alternative: %TC:macroword)
   %TC:header [macro] [param.states]
-    |    header macro rule, as macro but counts as one header
+    |    header macro rule, as macro but counts as one header (deprecated, use instead: %TC:macro \macro [header])
   %TC:breakmacro [macro] [label]
     |    macro causing subcount break point
-  %TC:group [name] [param.states] [content-state]
-    |    begin-end-group handling rule
+  %TC:envir [name] [param.states] [content-state]
+    |    \begin-\end environment handling rule (alternative: %TC:group)
   %TC:floatinclude [macro] [param.states]
     |    as macro, but also counted inside floats
   %TC:preambleinclude [macro] [param.states]
@@ -3788,6 +3953,7 @@
   %TC:incbib                include bibliography (same as running with -incbib)
   %TC:ignore                ignore region, end with %TC:endignore
   %TC:insert [code]         insert code for TeXcount to process as TeX code
+  %TC:subst [from] [to]     replace string thoughout document
   %TC:newtemplate           start a new template, ie delete the existing one
   %TC:template [template]   add another line to the template specification
 See the documentation for more details.



More information about the tex-live-commits mailing list