[latex3-commits] [git/LaTeX3-latex3-latex3] master: Improve benchmarking quality; add tests (817114b)

Tue May 8 22:44:37 CEST 2018

Repository : https://github.com/latex3/latex3
On branch  : master
Link       : https://github.com/latex3/latex3/commit/817114b63f40bef649cb4b1dc2208c0bb63c7265

>---------------------------------------------------------------

commit 817114b63f40bef649cb4b1dc2208c0bb63c7265
Author: Bruno Le Floch <bruno at le-floch.fr>
Date:   Tue May 8 16:44:37 2018 -0400

    Improve benchmarking quality; add tests
    
    Rather than always repeating the code 4^n times for some n, repeat it a
    "more continuous" number of times.  This avoids strange jumps in timing
    with the earlier code when code passed from one 4^n to the next.
    
    The tests check that benchmarking code does not give completely crazy
    results.  Hopefully it's lax enough to pass on Travis.


>---------------------------------------------------------------

817114b63f40bef649cb4b1dc2208c0bb63c7265
 l3trial/l3benchmark/l3benchmark.dtx                |  254 +++++++++++---------
 l3trial/l3benchmark/testfiles/m3benchmark000.tlg   |    4 +-
 .../l3benchmark/testfiles/m3benchmark000.xetex.tlg |    1 -
 l3trial/l3benchmark/testfiles/m3benchmark001.lvt   |   63 +++++
 .../l3benchmark/testfiles/m3benchmark001.tlg       |   13 +-
 5 files changed, 218 insertions(+), 117 deletions(-)

diff --git a/l3trial/l3benchmark/l3benchmark.dtx b/l3trial/l3benchmark/l3benchmark.dtx
index 362b0aa..8948c5d 100644
--- a/l3trial/l3benchmark/l3benchmark.dtx
+++ b/l3trial/l3benchmark/l3benchmark.dtx
@@ -64,7 +64,14 @@
 %
 % \begin{documentation}
 %
-% \section{Benchmark}
+% \section{Additions to \pkg{l3sys}: elapsed time}
+%
+% \begin{function}{\sys_timer_gzero:}
+%   \begin{syntax}
+%     \cs{sys_timer_gzero:}
+%   \end{syntax}
+%   Resets the timer to zero.
+% \end{function}
 %
 % \begin{function}{\sys_timer_apply:N}
 %   \begin{syntax}
@@ -77,11 +84,24 @@
 %   overflow after about $9$ hours in some engines.
 % \end{function}
 %
-% \begin{function}{\sys_timer_gzero:}
+% \section{Benchmark}
+%
+% \begin{function}{\benchmark_display:n}
 %   \begin{syntax}
-%     \cs{sys_timer_gzero:}
+%     \cs{benchmark_display:n} \Arg{time}
 %   \end{syntax}
-%   Resets the timer to zero.
+%   Prints the \meta{time} (in seconds) to the terminal.  This function
+%   can be redefined.
+% \end{function}
+%
+% \begin{function}{\benchmark_once:n}
+%   \begin{syntax}
+%     \cs{benchmark_once:n} \Arg{code}
+%   \end{syntax}
+%   Measures the time taken by \TeX{} to run the \meta{code} once, and
+%   calls \cs{benchmark_display:n} \Arg{time} with the \meta{time} in
+%   seconds.  The code is run only once so the time may be quite
+%   inaccurate for fast code.
 % \end{function}
 %
 % \begin{function}{\benchmark:n}
@@ -89,32 +109,23 @@
 %     \cs{benchmark:n} \Arg{code}
 %   \end{syntax}
 %   Measures the time taken by \TeX{} to run the \meta{code}, and calls
-%   \cs{benchmark_display:n} with the time in seconds as its argument.
-%   By default, that display function prints the time to the terminal.
-%   The behaviour can be changed by redefining \cs{benchmark_display:n}.
-%   This function cannot be nested.  The \meta{code} is run many times
-%   in a row, and not within a group, thus it should not have
+%   \cs{benchmark_display:n} \Arg{time} with the \meta{time} in seconds.
+%   This function cannot be nested.  The \meta{code} may be run many
+%   times in a row, and not within a group, thus it should not have
 %   side-effects.
 % \end{function}
 %
-% \begin{variable}{\l_benchmark_duration_int}
-%   This variable controls the duration of the benchmark in scaled
-%   seconds. Namely, the benchmark will take a time roughly between half
-%   and twice \cs{l_benchmark_duration_int} scaled seconds.
-% \end{variable}
-%
 % \begin{function}{\benchmark_tic:,\benchmark_toc:}
 %   \begin{syntax}
-%     |\benchmark_tic:| \meta{slow code} |\benchmark_toc:|
+%     \cs{benchmark_tic:} \meta{slow code} \cs{benchmark_toc:}
 %   \end{syntax}
-%   When it is not possible to run \cs{benchmark:n} (e.g., the code
-%   is part of the execution of a package which cannot be looped)
-%   the tic/toc commands can be used instead to time between two points
-%   in the code.
-%   When executed, |\benchmark_tic:| will print a line to the terminal,
-%   and |\benchmark_toc:| will print a matching line with a time to indicate
-%   the duration between them in seconds.
-%   Note that these commands can be nested.
+%   When it is not possible to run \cs{benchmark:n} (e.g., the code is
+%   part of the execution of a package which cannot be looped) the
+%   tic/toc commands can be used instead to time between two points in
+%   the code.  When executed, \cs{benchmark_tic:} will print a line to the
+%   terminal, and \cs{benchmark_toc:} will print a matching line with a
+%   time to indicate the duration between them in seconds.  Note that
+%   these commands can be nested.
 % \end{function}
 %
 % \end{documentation}
@@ -295,16 +306,6 @@ end
 %<@@=benchmark>
 %    \end{macrocode}
 %
-% \begin{variable}{\l_benchmark_duration_int}
-%   The benchmark is constrained to take roughly
-%   (from half to twice) \cs{l_benchmark_duration_int} scaled
-%   seconds, unless one iteration of the code takes longer.
-%    \begin{macrocode}
-\int_new:N \l_benchmark_duration_int
-\int_set:Nn \l_benchmark_duration_int { 65536 }
-%    \end{macrocode}
-% \end{variable}
-%
 %    \begin{macrocode}
 \sys_if_timer_exist:F
   {
@@ -327,12 +328,21 @@ end
   }
 %    \end{macrocode}
 %
-% \begin{variable}{\l_@@_time_a_tl, \l_@@_time_b_tl}
-%   These two variables hold the time before and after running a piece
-%   of code, as a decimal number in seconds.
+% \begin{variable}{\l_@@_duration_int}
+%   The benchmark is constrained to take roughly (from half to twice)
+%   \cs{l_@@_duration_int} scaled seconds, unless one iteration of the
+%   code takes longer.
 %    \begin{macrocode}
-\tl_new:N \l_@@_time_a_tl
-\tl_new:N \l_@@_time_b_tl
+\int_new:N \l_@@_duration_int
+\int_set:Nn \l_@@_duration_int { 65536 }
+%    \end{macrocode}
+% \end{variable}
+% 
+% \begin{variable}{\l_@@_time_int}
+%   This variable holds the time before and after running a piece of
+%   code, as an integer in scaled seconds.
+%    \begin{macrocode}
+\int_new:N \l_@@_time_int
 %    \end{macrocode}
 % \end{variable}
 %
@@ -344,109 +354,137 @@ end
 %    \end{macrocode}
 % \end{variable}
 %
-% \begin{macro}[int]{\@@_raw:n}
-% \begin{macro}[int]{\@@_outer_do_nothing:}
-%   The time it took to perform a given piece of code is the difference
-%   between the decimal numbers stored in the token lists
-%   \cs{l_@@_time_a_tl} and \cs{l_@@_time_b_tl}.  The
-%   function \cs{@@_outer_do_nothing:} prevents the arbitrary
-%   user code from tampering with the rest of the time module.
+% \begin{variable}{\g_@@_nesting_int}
+% \begin{macro}{\@@_raw:n, \@@_raw_aux:N, \@@_raw_end:n}
+%   Store in \cs{l_@@_time_int} the time it took to perform a given
+%   piece of code, in scaled seconds.  We call \cs{sys_timer_apply:N} as
+%   close before and after the code as possible.  We store the
+%   intermediate result in a new integer when \cs{@@_raw:n} is
+%   nested.
 %    \begin{macrocode}
-\cs_new_protected:Npn \@@_raw:n
-  { \sys_timer_apply:N \@@_raw_aux:nn }
-\cs_new_protected:Npn \@@_raw_aux:nn #1#2
+\int_new:N \g_@@_nesting_int
+\cs_new_protected:Npn \@@_raw:n #1
   {
-    #2
-    \@@_outer_do_nothing:
-    \sys_timer_apply:N \@@_raw_auxii:nn {#1}
+    \int_gincr:N \g_@@_nesting_int
+    \exp_args:Nc \@@_raw_aux:N
+      { g_@@_ \int_use:N \g_@@_nesting_int _int }
+    \sys_timer_apply:N \@@_raw_aux:n
+    #1
+    \sys_timer_apply:N \@@_raw_end:n
   }
-\cs_new_protected:Npn \@@_raw_auxii:nn #1#2
+\cs_new_protected:Npn \@@_raw_aux:N #1
   {
-    \tl_set:Nn \l_@@_time_a_tl {#2}
-    \tl_set:Nn \l_@@_time_b_tl {#1}
+    \int_gzero_new:N #1
+    \cs_gset_protected:Npn \@@_raw_aux:n { \int_gset:Nn #1 }
+  }
+\cs_new_protected:Npn \@@_raw_end:n #1
+  {
+    \int_set:Nn \l_@@_time_int
+      {
+        #1 -
+        \int_use:c { g_@@_ \int_use:N \g_@@_nesting_int _int }
+      }
+    \int_gdecr:N \g_@@_nesting_int
   }
-\tex_outer:D \cs_set:Npn \@@_outer_do_nothing: { }
 %    \end{macrocode}
 % \end{macro}
-% \end{macro}
 %
-% \begin{macro}[int]{\@@_raw_replicate:nn}
+% \begin{macro}{\@@_raw_replicate:nn, \@@_tmp:w, \@@_raw_replicate_aux:n}
 %   Here, we wish to measure the time it takes for the piece of code |#2|
 %   to be run |#1| times. The \cs{prg_replicate:nn} is expanded before
 %   being fed to \cs{@@_raw:n} to avoid timing its expansion as well.
+%
+%   We subtract the time for another call to \cs{@@_tmp:w}, with the
+%   same arguments (to capture the time it takes to read the argument)
+%   but empty expansion.
+%
+%   If the number of copies required is large (here ${}>1024$), it may
+%   exhaust \TeX{}'s main memory. In that case, we replicate $1024$
+%   times less the code |\@@_replicate_kibi_fold:n {#1}|.  Of course the
+%   division by $1024$ rounds to an integer, so that step introduces a
+%   relative error of order $1/1000$, much less than many other sources
+%   of variability.
 %    \begin{macrocode}
-\cs_new_protected:Npn \@@_raw_replicate:nn #1#2
+\cs_new_eq:NN \@@_tmp:w ?
+\cs_new_protected:Npn \@@_raw_replicate:nn #1
+  {
+    \int_compare:nNnTF {#1} > { 1024 }
+      { \@@_raw_replicate_large:nn {#1} }
+      { \@@_raw_replicate_small:nn {#1} }
+  }
+\cs_new_protected:Npn \@@_raw_replicate_large:nn #1#2
   {
-    \exp_args:Nx \@@_raw:n
-      { \prg_replicate:nn {#1} { \exp_not:n {#2} } }
+    \exp_args:Nno \@@_raw_replicate:nn { #1 / 1024 }
+      { \@@_replicate_kibi_fold:n {#2} }
   }
+\cs_new_protected:Npn \@@_raw_replicate_small:nn #1#2
+  {
+    \cs_set:Npx \@@_tmp:w ##1##2 { \prg_replicate:nn {#1} {##1} }
+    \@@_raw:n { \@@_tmp:w {#2} { } }
+    \exp_args:No \@@_raw_replicate_aux:nn
+      { \int_use:N \l_@@_time_int } {#2}
+  }
+\cs_new_protected:Npn \@@_raw_replicate_aux:nn #1#2
+  {
+    \@@_raw:n { \@@_tmp:w { } {#2} }
+    \int_set:Nn \l_@@_time_int { #1 - \l_@@_time_int }
+  }
+\cs_new:Npx \@@_replicate_kibi_fold:n #1
+  { \prg_replicate:nn {1024} {#1} }
 %    \end{macrocode}
 % \end{macro}
 %
-% \begin{macro}[EXP,int]{\@@_replicate_kibi_fold:n}
-%   As its name indicates, expanding this function once yields
-%   $1024$ copies of its argument. This is used to avoid reaching
-%   the limits of \TeX{}'s memory in case the user input is a very
-%   simple piece of code.
+% \begin{macro}{\benchmark_once:n}
+%   Convert from scaled seconds to seconds.
 %    \begin{macrocode}
-\cs_new:Npx \@@_replicate_kibi_fold:n #1
-  { \prg_replicate:nn {1024} {#1} }
+\cs_new_protected:Npn \benchmark_once:n #1
+  {
+    \@@_raw:n {#1}
+    \benchmark_display:f { \fp_to_tl:n { \l_@@_time_int / 65536 } }
+  }
 %    \end{macrocode}
 % \end{macro}
 %
 % \begin{macro}{\benchmark:n}
-%   The main timing function. First time the user code once.
-%   If that took less than half a second, repeat the measurement
-%   with $4$ copies of the code. Continue quadrupling the number
-%   of copies until reaching a time greater than half a second.
-%   If the number of copies required is large (here ${}>1024$),
-%   it may exhaust \TeX{}'s main memory. In that case, we
-%   replicate $1024$ times less the code
-%   |\@@_replicate_kibi_fold:n {#1}|.
-%   Once a large enough time was measured, divide that by $65536$
-%   and by the number of repetitions.
-%   The function \cs{benchmark_display:n} can be redefined by the user
-%   if the output should be done differently than to the terminal.
+%   The main timing function. First time the user code once.  If that
+%   took more than half a second we're done.  If that took much less
+%   than a second, quadruple the number of copies until it takes a
+%   reasonable amount of time (this is to avoid division by
+%   \cs{l_@@_time_int} when that is zero or too small).  Finally if it
+%   took between one hundredth of a second and half a second compute a
+%   number of times that can fit in one second (minus the time we
+%   already spent) and measure that.  Once a large enough time was
+%   measured, divide that by $65536$ and by the number of repetitions.
 %    \begin{macrocode}
 \cs_new_protected:Npn \benchmark:n #1
   {
     \int_set:Nn \l_@@_repeat_int { 1 }
     \@@_raw:n {#1}
-    \bool_while_do:nn
+    \int_compare:nNnT { \l_@@_time_int } > { \l_@@_duration_int / 2 }
+      { \prg_break: }
+    \int_while_do:nNnn
+      { \l_@@_time_int } < { \l_@@_duration_int / 100 }
       {
-        \int_compare_p:nNn
-          { \l_@@_time_b_tl - \l_@@_time_a_tl }
-          < { \l_benchmark_duration_int / 2 }
-      }
-      {
-        \int_set:Nn \l_@@_repeat_int { 4 * \l_@@_repeat_int }
-        \int_compare:nNnTF \l_@@_repeat_int > { 1024 }
+        \int_compare:nNnT \l_@@_repeat_int > { \c_max_int / 4 }
           {
-            \int_compare:nNnTF \l_@@_repeat_int > { 1048576 }
-              {
-                \@@_raw_replicate:nn
-                  { \l_@@_repeat_int / 1048576 }
-                  {
-                    \@@_replicate_kibi_fold:n
-                      { \@@_replicate_kibi_fold:n {#1} }
-                  }
-              }
-              {
-                \@@_raw_replicate:nn { \l_@@_repeat_int / 1024 }
-                  { \@@_replicate_kibi_fold:n {#1} }
-              }
+            \int_set:Nn \l_@@_time_int { 0 }
+            \prg_break:
           }
-          { \@@_raw_replicate:nn { \l_@@_repeat_int } {#1} }
+        \int_set:Nn \l_@@_repeat_int { 4 * \l_@@_repeat_int }
+        \@@_raw_replicate:nn { \l_@@_repeat_int } {#1}
       }
-    \benchmark_display:f
+    \int_set:Nn \l_@@_repeat_int
       {
-        \fp_to_tl:n
-          {
-            ( \l_@@_time_b_tl - \l_@@_time_a_tl )
-            / \l_@@_repeat_int / 65536
-          }
+        \l_@@_duration_int * \l_@@_repeat_int / \l_@@_time_int
+        - \l_@@_repeat_int * 5 / 4 - 1
       }
+    \@@_raw_replicate:nn { \l_@@_repeat_int } {#1}
+    \prg_break_point:
+    \fp_set:Nn \l_@@_time_fp
+      { \l_@@_time_int / \l_@@_repeat_int / 65536 }
+    \benchmark_display:f { \fp_to_tl:N \l_@@_time_fp }
   }
+\fp_new:N \l_@@_time_fp
 %    \end{macrocode}
 % \end{macro}
 %
diff --git a/l3trial/l3benchmark/testfiles/m3benchmark000.tlg b/l3trial/l3benchmark/testfiles/m3benchmark000.tlg
index c58ece3..66275be 100644
--- a/l3trial/l3benchmark/testfiles/m3benchmark000.tlg
+++ b/l3trial/l3benchmark/testfiles/m3benchmark000.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 (l3benchmark.sty
 Package: l3benchmark YYYY-MM-DD L3 Experimental benchmarking
-\l_benchmark_duration_int=\count...
+\l__benchmark_duration_int=\count...
+\l__benchmark_time_int=\count...
 \l__benchmark_repeat_int=\count...
+\g__benchmark_nesting_int=\count...
 \g__benchmark_tictoc_int=\count...
 )
diff --git a/l3trial/l3benchmark/testfiles/m3benchmark000.xetex.tlg b/l3trial/l3benchmark/testfiles/m3benchmark000.xetex.tlg
index f9d485e..8dbec9c 100644
--- a/l3trial/l3benchmark/testfiles/m3benchmark000.xetex.tlg
+++ b/l3trial/l3benchmark/testfiles/m3benchmark000.xetex.tlg
@@ -2,7 +2,6 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 (l3benchmark.sty
 Package: l3benchmark YYYY-MM-DD L3 Experimental benchmarking
-\l_benchmark_duration_int=\count...
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
 ! Critical benchmark error: "no-time"
diff --git a/l3trial/l3benchmark/testfiles/m3benchmark001.lvt b/l3trial/l3benchmark/testfiles/m3benchmark001.lvt
new file mode 100644
index 0000000..772c228
--- /dev/null
+++ b/l3trial/l3benchmark/testfiles/m3benchmark001.lvt
@@ -0,0 +1,63 @@
+%
+% Copyright (C) 2018 The LaTeX3 Project
+%
+
+\documentclass{minimal}
+\input{regression-test}
+
+\RequirePackage[enable-debug]{expl3}
+\RequirePackage{l3benchmark}
+\ExplSyntaxOn
+\debug_on:n { check-declarations , deprecation , log-functions }
+\ExplSyntaxOff
+
+\START
+\ExplSyntaxOn
+
+\int_set:Nn \l__benchmark_duration_int { 65536 / 10 }
+
+\OMIT
+\exp_args:NNx \seq_set_from_clist:Nn \l_tmpa_seq
+  { \prg_replicate:nn { 123 } { a , } }
+\cs_new_protected:Npn \test_slow_code:
+  { \seq_reverse:N \l_tmpa_seq }
+\TIMO
+
+\TEST { benchmark_once }
+  {
+    \int_zero:N \l_tmpa_int
+    \cs_gset_protected:Npn \benchmark_display:n { \fp_set:Nn \l_tmpa_fp }
+    \benchmark_once:n { \int_incr:N \l_tmpa_int }
+    \int_log:N \l_tmpa_int    % Check the code was done only once
+    \fp_compare:nTF { \l_tmpa_fp < 2e-5 }  % Check it took 0 or 1 scaled seconds
+      { \TRUE }
+      { \ERROR \fp_show:N \l_tmpa_fp }
+  }
+
+\TEST { benchmark }
+  {
+    \clist_clear:N \l_tmpa_clist
+    \clist_clear:N \l_tmpb_clist
+    \prg_replicate:nn { 3 }
+      {
+        \cs_gset_protected:Npn \benchmark_display:n
+          { \clist_put_right:Nn \l_tmpa_clist }
+        \benchmark:n { \test_slow_code: }
+        \cs_gset_protected:Npn \benchmark_display:n
+          { \clist_put_right:Nn \l_tmpb_clist }
+        \benchmark:n { \test_slow_code: \test_slow_code: }
+      }
+    \fp_compare:nTF % Check that doing code twice takes between 1.5 and 2.5 times as much
+      {
+        (1.5 * min ( \l_tmpa_clist ) < max ( \l_tmpb_clist ))
+        && (2.5 * max ( \l_tmpa_clist ) > min ( \l_tmpb_clist ))
+      }
+      { \TRUE }
+      {
+        \ERROR
+        \TYPE { 1:\l_tmpa_clist }
+        \TYPE { 2:\l_tmpb_clist }
+      }
+  }
+
+\END
diff --git a/l3kernel/testfiles/m3str002.ptex.tlg b/l3trial/l3benchmark/testfiles/m3benchmark001.tlg
similarity index 75%
copy from l3kernel/testfiles/m3str002.ptex.tlg
copy to l3trial/l3benchmark/testfiles/m3benchmark001.tlg
index fccc5ed..1904c90 100644
--- a/l3kernel/testfiles/m3str002.ptex.tlg
+++ b/l3trial/l3benchmark/testfiles/m3benchmark001.tlg
@@ -1,16 +1,15 @@
 This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
-Author: Joseph Wright
 ============================================================
-TEST 1: Simple Latin case folding
+TEST 1: benchmark_once
 ============================================================
-"abc 123 abc !@"
-" abc 123 abc !@ "
-"some $&## odd text { } "
+Defining \g__benchmark_1_int on line ...
+\g__benchmark_1_int=\count...
+> \l_tmpa_int=1.
+TRUE
 ============================================================
 ============================================================
-TEST 2: Checking category codes
+TEST 2: benchmark
 ============================================================
-FALSE
 TRUE
 ============================================================