texlive[75017] Master/texmf-dist: expltools (25apr25)
commits+karl at tug.org
commits+karl at tug.org
Fri Apr 25 23:14:22 CEST 2025
Revision: 75017
https://tug.org/svn/texlive?view=revision&revision=75017
Author: karl
Date: 2025-04-25 23:14:22 +0200 (Fri, 25 Apr 2025)
Log Message:
-----------
expltools (25apr25)
Modified Paths:
--------------
trunk/Master/texmf-dist/doc/support/expltools/CHANGES.md
trunk/Master/texmf-dist/doc/support/expltools/README.md
trunk/Master/texmf-dist/doc/support/expltools/project-proposal.pdf
trunk/Master/texmf-dist/doc/support/expltools/s206-03.tex
trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors-03-syntactic-analysis.md
trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors.pdf
trunk/Master/texmf-dist/scripts/expltools/explcheck-cli.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-config.toml
trunk/Master/texmf-dist/scripts/expltools/explcheck-evaluation.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-format.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-lexical-analysis.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-obsolete.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-parsers.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-ranges.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-syntactic-analysis.lua
trunk/Master/texmf-dist/scripts/expltools/explcheck-utils.lua
Added Paths:
-----------
trunk/Master/texmf-dist/doc/support/expltools/e304-01.tex
trunk/Master/texmf-dist/doc/support/expltools/e304-02.tex
trunk/Master/texmf-dist/scripts/expltools/explcheck-semantic-analysis.lua
Modified: trunk/Master/texmf-dist/doc/support/expltools/CHANGES.md
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/CHANGES.md 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/doc/support/expltools/CHANGES.md 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,5 +1,39 @@
# Changes
+## expltools 2025-04-25
+
+### explcheck v0.9.0
+
+#### Development
+
+- Add basic support for semantic analysis and reading (nested) function
+ definitions. (#75)
+
+ None of the issues from Section 4 of the document titled [_Warnings and errors
+ for the expl3 analysis tool_][warnings-and-errors] are recognized by
+ explcheck yet. Support for (some of) these issues will be added in the next
+ minor release.
+
+ [warnings-and-errors]: https://github.com/witiko/expltools/releases/download/latest/warnings-and-errors.pdf
+
+- Add error E304 (Unexpected parameter number) for incorrect parameter tokens
+ in parameter and replacement texts of function definitions. (#75)
+
+#### Fixes
+
+- Exclude global scratch variables from issue S206 (Malformed variable or
+ constant name). (reported by @fpantigny in #76, fixed in #77)
+
+- Do not produce warning S204 (Missing stylistic whitespaces) in Lua code.
+ (reported by @zepinglee in #29, fixed in #75)
+
+#### Documentation
+
+- Add a link to [a work-in-progress TUG 2025 paper][expltools-tug25-paper] to
+ `README.md`. (8d4177b, 99ef3b9)
+
+ [expltools-tug25-paper]: https://github.com/witiko/expltools-tug25-paper
+
## expltools 2025-04-01
### explcheck v0.8.1
Modified: trunk/Master/texmf-dist/doc/support/expltools/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/README.md 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/doc/support/expltools/README.md 2025-04-25 21:14:22 UTC (rev 75017)
@@ -17,6 +17,8 @@
7. [Lexical analysis and a public website listing issues in current TeX Live][12] from February 24, 2025
8. [Syntactic analysis][13] from March 27, 2025
+The article [Expltools: Development tools for expl3 programmers][14], forthcoming in TUGboat 46(2), is a work in progress that summarizes the devlog posts and provides a coherent overview of the current state of the tool.
+
In the future, this repository may also contain the code of other useful development tools for expl3 programmers, such as a command-line utility similar to `grep` that will ignore whitespaces and newlines as well as other tools.
[1]: https://witiko.github.io/Expl3-Linter-1/
@@ -32,6 +34,7 @@
[11]: https://koppor.github.io/explcheck-issues/
[12]: https://witiko.github.io/Expl3-Linter-7/
[13]: https://witiko.github.io/Expl3-Linter-8/
+ [14]: https://github.com/Witiko/expltools-tug25-paper
## Usage
@@ -59,6 +62,7 @@
local preprocessing = require("explcheck-preprocessing")
local lexical_analysis = require("explcheck-lexical-analysis")
local syntactic_analysis = require("explcheck-syntactic-analysis")
+local semantic_analysis = require("explcheck-semantic-analysis")
-- Process file "code.tex" and print warnings and errors.
local filename = "code.tex"
@@ -72,6 +76,7 @@
preprocessing.process(filename, content, issues, results)
lexical_analysis.process(filename, content, issues, results)
syntactic_analysis.process(filename, content, issues, results)
+semantic_analysis.process(filename, content, issues, results)
print(
"There were " .. #issues.warnings .. " warnings, "
Added: trunk/Master/texmf-dist/doc/support/expltools/e304-01.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/e304-01.tex (rev 0)
+++ trunk/Master/texmf-dist/doc/support/expltools/e304-01.tex 2025-04-25 21:14:22 UTC (rev 75017)
@@ -0,0 +1,4 @@
+\cs_new:Npn
+ \example_foo:nnn
+ #1#2#9 % error on this line
+ { foo~#1 }
Property changes on: trunk/Master/texmf-dist/doc/support/expltools/e304-01.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/support/expltools/e304-02.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/e304-02.tex (rev 0)
+++ trunk/Master/texmf-dist/doc/support/expltools/e304-02.tex 2025-04-25 21:14:22 UTC (rev 75017)
@@ -0,0 +1,4 @@
+\cs_new:Npn
+ \example_foo:nnn
+ #1#2#3
+ { foo~#4 } % error on this line
Property changes on: trunk/Master/texmf-dist/doc/support/expltools/e304-02.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/doc/support/expltools/project-proposal.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/support/expltools/s206-03.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/s206-03.tex 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/doc/support/expltools/s206-03.tex 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,6 +1,9 @@
\tl_use:N
\l_tmpa_tl
-\int_use:N
- \l_tmpb_int
-\str_use:N
- \l_tmpa_str
+\int_gset:Nn
+ \g_tmpb_int
+ { 1 + 2 }
+\str_show:N
+ \g_tmpa_str
+\bool_set_true:N
+ \l_tmpa_bool
Modified: trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors-03-syntactic-analysis.md
===================================================================
--- trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors-03-syntactic-analysis.md 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors-03-syntactic-analysis.md 2025-04-25 21:14:22 UTC (rev 75017)
@@ -26,6 +26,12 @@
## Braced N-type function call argument {.w label=w303}
An N-type function call argument is braced:
- /w302.tex
+ /w303.tex
Depending on the specific function, this may or may not be an error.
+
+# Unexpected parameter number {.e label=e304}
+A parameter or replacement text contains parameter tokens (`#`) followed by unexpected numbers:
+
+ /e304-01.tex
+ /e304-02.tex
Modified: trunk/Master/texmf-dist/doc/support/expltools/warnings-and-errors.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-cli.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-cli.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-cli.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -12,7 +12,7 @@
local preprocessing = require("explcheck-preprocessing")
local lexical_analysis = require("explcheck-lexical-analysis")
local syntactic_analysis = require("explcheck-syntactic-analysis")
--- local semantic_analysis = require("explcheck-semantic-analysis")
+local semantic_analysis = require("explcheck-semantic-analysis")
-- local pseudo_flow_analysis = require("explcheck-pseudo-flow-analysis")
-- Deduplicate pathnames.
@@ -90,8 +90,9 @@
assert(file:close())
-- Run all steps.
+ local steps = {preprocessing, lexical_analysis, syntactic_analysis, semantic_analysis}
local analysis_results = {}
- for _, step in ipairs({preprocessing, lexical_analysis, syntactic_analysis}) do
+ for _, step in ipairs(steps) do
step.process(pathname, content, issues, analysis_results, options)
-- If a processing step ended with error, skip all following steps.
if #issues.errors > 0 then
@@ -162,7 +163,7 @@
end
local function print_version()
- print("explcheck (expltools 2025-04-01) v0.8.1")
+ print("explcheck (expltools 2025-04-25) v0.9.0")
print("Copyright (c) 2024-2025 Vít Starý Novotný")
print("Licenses: LPPL 1.3 or later, GNU GPL v2 or later")
end
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-config.toml
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-config.toml 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-config.toml 2025-04-25 21:14:22 UTC (rev 75017)
@@ -11,7 +11,7 @@
[package.l3kernel]
expl3_detection_strategy = "always"
-ignored_issues = ["e208", "e209", "e300", "e301", "w302"]
+ignored_issues = ["e208", "e209", "e300", "e301", "w302", "e304"]
max_line_length = 140
[filename."tex4ht.sty"]
@@ -75,6 +75,7 @@
[package.ctex]
expl3_detection_strategy = "always"
max_line_length = 100
+ignored_issues = ["e304"]
[filename."knowledge.sty"]
ignored_issues = ["e201", "e300", "e301", "w302"]
@@ -82,9 +83,10 @@
[filename."grading-scheme.sty"]
ignored_issues = ["e201"]
-[filename."scontents.tex"]
+[package.scontents]
expl3_detection_strategy = "always"
max_line_length = 90
+ignored_issues = ["e300"]
[package.stex]
expl3_detection_strategy = "always"
@@ -111,7 +113,7 @@
[filename."unravel.sty"]
expl3_detection_strategy = "always"
max_line_length = 100
-ignored_issues = ["e209"]
+ignored_issues = ["e209", "e300"]
[filename."polexprcore.tex"]
expl3_detection_strategy = "precision"
@@ -250,7 +252,7 @@
ignored_issues = ["e301", "w302"]
[package.leadsheets]
-ignored_issues = ["e301"]
+ignored_issues = ["e300", "e301"]
[package.mathcommand]
ignored_issues = ["e301"]
@@ -272,3 +274,18 @@
[filename."ecgdraw.sty"]
ignored_issues = ["e201"]
+
+[filename."xassoccnt.sty"]
+ignored_issues = ["e304"]
+
+[filename."skdoc.cls"]
+ignored_issues = ["e304"]
+
+[package."koma-script"]
+ignored_issues = ["e304"]
+
+[package."duckuments"]
+ignored_issues = ["e304"]
+
+[package.xduts]
+ignored_issues = ["e300"]
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-evaluation.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-evaluation.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-evaluation.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,9 +1,12 @@
-- Evaluation the analysis results, both for individual files and in aggregate.
-local call_types = require("explcheck-syntactic-analysis").call_types
+local token_types = require("explcheck-lexical-analysis").token_types
+local statement_types = require("explcheck-semantic-analysis").statement_types
-local CALL = call_types.CALL
+local ARGUMENT = token_types.ARGUMENT
+local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
+
local FileEvaluationResults = {}
local AggregateEvaluationResults = {}
@@ -31,7 +34,10 @@
if analysis_results.tokens ~= nil then
num_tokens = 0
for _, part_tokens in ipairs(analysis_results.tokens) do
- num_tokens = num_tokens + #part_tokens
+ for _, token in ipairs(part_tokens) do
+ assert(token[1] ~= ARGUMENT)
+ num_tokens = num_tokens + 1
+ end
end
end
local num_groupings, num_unclosed_groupings
@@ -48,18 +54,99 @@
end
-- Evaluate the results of the syntactic analysis.
local num_calls, num_call_tokens
+ local num_calls_total
if analysis_results.calls ~= nil then
- num_calls, num_call_tokens = 0, 0
+ num_calls, num_call_tokens = {}, {}
+ num_calls_total = 0
for _, part_calls in ipairs(analysis_results.calls) do
for _, call in ipairs(part_calls) do
local call_type, call_tokens, _, _ = table.unpack(call)
- if call_type == CALL then
- num_calls = num_calls + 1
- num_call_tokens = num_call_tokens + #call_tokens
+ if num_calls[call_type] == nil then
+ assert(num_call_tokens[call_type] == nil)
+ num_calls[call_type] = 0
+ num_call_tokens[call_type] = 0
end
+ num_calls[call_type] = num_calls[call_type] + 1
+ num_call_tokens[call_type] = num_call_tokens[call_type] + #call_tokens
+ num_calls_total = num_calls_total + 1
end
end
end
+ local num_replacement_text_calls, num_replacement_text_call_tokens
+ local num_replacement_text_calls_total
+ if analysis_results.replacement_texts ~= nil then
+ num_replacement_text_calls, num_replacement_text_call_tokens = {}, {}
+ num_replacement_text_calls_total = 0
+ for _, part_replacement_texts in ipairs(analysis_results.replacement_texts) do
+ for _, replacement_text_calls in ipairs(part_replacement_texts.calls) do
+ for _, call in pairs(replacement_text_calls) do
+ local call_type, call_tokens, _, _ = table.unpack(call)
+ if num_replacement_text_calls[call_type] == nil then
+ assert(num_replacement_text_call_tokens[call_type] == nil)
+ num_replacement_text_calls[call_type] = 0
+ num_replacement_text_call_tokens[call_type] = 0
+ end
+ num_replacement_text_calls[call_type] = num_replacement_text_calls[call_type] + 1
+ num_replacement_text_call_tokens[call_type] = num_replacement_text_call_tokens[call_type] + #call_tokens
+ num_replacement_text_calls_total = num_replacement_text_calls_total + 1
+ end
+ end
+ end
+ end
+ -- Evaluate the results of the semantic analysis.
+ local num_statements, num_statement_tokens
+ local num_statements_total
+ if analysis_results.statements ~= nil then
+ num_statements, num_statement_tokens = {}, {}
+ num_statements_total = 0
+ for part_number, part_statements in ipairs(analysis_results.statements) do
+ local part_calls = analysis_results.calls[part_number]
+ for statement_number, statement in ipairs(part_statements) do
+ local statement_type = table.unpack(statement)
+ local _, call_tokens = table.unpack(part_calls[statement_number])
+ if num_statements[statement_type] == nil then
+ assert(num_statement_tokens[statement_type] == nil)
+ num_statements[statement_type] = 0
+ num_statement_tokens[statement_type] = 0
+ end
+ num_statements[statement_type] = num_statements[statement_type] + 1
+ num_statement_tokens[statement_type] = num_statement_tokens[statement_type] + #call_tokens
+ num_statements_total = num_statements_total + 1
+ end
+ end
+ end
+ local num_replacement_text_statements, num_replacement_text_statement_tokens
+ local num_replacement_text_statements_total, replacement_text_max_nesting_depth
+ if analysis_results.replacement_texts ~= nil then
+ num_replacement_text_statements, num_replacement_text_statement_tokens = {}, {}
+ num_replacement_text_statements_total = 0
+ replacement_text_max_nesting_depth = {}
+
+ for _, part_replacement_texts in ipairs(analysis_results.replacement_texts) do
+ for replacement_text_number, replacement_text_statements in ipairs(part_replacement_texts.statements) do
+ local nesting_depth = part_replacement_texts.nesting_depth[replacement_text_number]
+ for statement_number, statement in pairs(replacement_text_statements) do
+ local statement_type = table.unpack(statement)
+ local _, call_tokens = table.unpack(part_replacement_texts.calls[replacement_text_number][statement_number])
+ if num_replacement_text_statements[statement_type] == nil then
+ assert(num_replacement_text_statement_tokens[statement_type] == nil)
+ num_replacement_text_statements[statement_type] = 0
+ num_replacement_text_statement_tokens[statement_type] = 0
+ replacement_text_max_nesting_depth[statement_type] = 0
+ end
+ num_replacement_text_statements[statement_type] = num_replacement_text_statements[statement_type] + 1
+ if statement_type ~= FUNCTION_DEFINITION or nesting_depth == 1 then
+ -- prevent counting overlapping tokens from nested function definitions several times
+ num_replacement_text_statement_tokens[statement_type]
+ = num_replacement_text_statement_tokens[statement_type] + #call_tokens
+ end
+ num_replacement_text_statements_total = num_replacement_text_statements_total + 1
+ replacement_text_max_nesting_depth[statement_type]
+ = math.max(replacement_text_max_nesting_depth[statement_type], nesting_depth)
+ end
+ end
+ end
+ end
-- Initialize the class.
self.num_total_bytes = num_total_bytes
self.num_warnings = num_warnings
@@ -70,6 +157,17 @@
self.num_unclosed_groupings = num_unclosed_groupings
self.num_calls = num_calls
self.num_call_tokens = num_call_tokens
+ self.num_calls_total = num_calls_total
+ self.num_replacement_text_calls = num_replacement_text_calls
+ self.num_replacement_text_call_tokens = num_replacement_text_call_tokens
+ self.num_replacement_text_calls_total = num_replacement_text_calls_total
+ self.num_statements = num_statements
+ self.num_statement_tokens = num_statement_tokens
+ self.num_statements_total = num_statements_total
+ self.num_replacement_text_statements = num_replacement_text_statements
+ self.num_replacement_text_statement_tokens = num_replacement_text_statement_tokens
+ self.num_replacement_text_statements_total = num_replacement_text_statements_total
+ self.replacement_text_max_nesting_depth = replacement_text_max_nesting_depth
return self
end
@@ -88,19 +186,49 @@
self.num_tokens = 0
self.num_groupings = 0
self.num_unclosed_groupings = 0
- self.num_calls = 0
- self.num_call_tokens = 0
+ self.num_calls = {}
+ self.num_call_tokens = {}
+ self.num_calls_total = 0
+ self.num_replacement_text_calls = {}
+ self.num_replacement_text_call_tokens = {}
+ self.num_replacement_text_calls_total = 0
+ self.num_statements = {}
+ self.num_statement_tokens = {}
+ self.num_statements_total = 0
+ self.num_replacement_text_statements = {}
+ self.num_replacement_text_statement_tokens = {}
+ self.num_replacement_text_statements_total = 0
+ self.replacement_text_max_nesting_depth = {_how = math.max}
return self
end
-- Add evaluation results of an individual file to the aggregate.
function AggregateEvaluationResults:add(evaluation_results)
- self.num_files = self.num_files + 1
- for key, _ in pairs(self) do
- if key ~= "num_files" and evaluation_results[key] ~= nil then
- self[key] = self[key] + evaluation_results[key]
+ local function aggregate_table(self_table, evaluation_result_table)
+ for key, value in pairs(evaluation_result_table) do
+ if type(value) == "number" then -- a simple count
+ if self_table[key] == nil then
+ self_table[key] = 0
+ end
+ assert(key ~= "_how")
+ if self_table._how ~= nil then
+ self_table[key] = self_table._how(self_table[key], value)
+ else
+ self_table[key] = self_table[key] + value
+ end
+ elseif type(value) == "table" then -- a table of counts
+ if self_table[key] == nil then
+ self_table[key] = {}
+ end
+ aggregate_table(self_table[key], value)
+ else
+ error('Unexpected field type "' .. type(value) .. '"')
+ end
end
end
+
+ self.num_files = self.num_files + 1
+ aggregate_table(self, evaluation_results)
end
return {
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-format.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-format.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-format.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,8 +1,11 @@
-- Formatting for the command-line interface of the static analyzer explcheck.
+local statement_types = require("explcheck-semantic-analysis").statement_types
local get_option = require("explcheck-config")
local utils = require("explcheck-utils")
+local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
+
local color_codes = {
BOLD = 1,
RED = 31,
@@ -15,15 +18,65 @@
local GREEN = color_codes.GREEN
local YELLOW = color_codes.YELLOW
--- Transform a singular into plural if the count is zero or greater than two.
+-- Get an iterator over the key-values in a table order by desceding values.
+local function pairs_sorted_by_descending_values(obj)
+ local items = {}
+ for key, value in pairs(obj) do
+ table.insert(items, {key, value})
+ end
+ table.sort(items, function(first_item, second_item)
+ if first_item[2] > second_item[2] then
+ return true
+ elseif first_item[2] == second_item[2] and first_item[1] > second_item[1] then
+ return true
+ else
+ return false
+ end
+ end)
+ local i = 0
+ return function()
+ i = i + 1
+ if i <= #items then
+ return table.unpack(items[i])
+ else
+ return nil
+ end
+ end
+end
+
+-- Transform a singular into plural if the count is zero, greater than two, or unspecified.
local function pluralize(singular, count)
if count == 1 then
return singular
else
- return singular .. "s"
+ local of_index = singular:find(" of ")
+ local plural
+ if of_index == nil then
+ plural = singular .. "s"
+ else
+ plural = singular:sub(1, of_index - 1) .. "s" .. singular:sub(of_index)
+ end
+ return plural
end
end
+-- Add either a definite article or an indefinite/zero article, based on the count.
+local function add_article(text, count, definite, starts_with_a_vowel)
+ if definite then
+ return "the " .. text
+ else
+ if count == 1 or count == nil then
+ if starts_with_a_vowel then
+ return "an " .. text
+ else
+ return "a " .. text
+ end
+ else
+ return text
+ end
+ end
+end
+
-- Upper-case the initial letter of a word.
local function titlecase(word)
assert(#word > 0)
@@ -129,7 +182,7 @@
assert(denominator > 0)
local formatted_percentage = string.format("%.0f%%", 100.0 * numerator / denominator)
if numerator > 0 and formatted_percentage == "0%" then
- return ">0%"
+ return "<1%"
else
return formatted_percentage
end
@@ -162,6 +215,7 @@
-- Display additional information.
if verbose then
+ local line_indent = (" "):rep(4)
print()
io.write(string.format("\n%s", colorize("Aggregate statistics:", BOLD)))
-- Display pre-evaluation information.
@@ -190,20 +244,82 @@
end
end
-- Evaluate the evalution results of the syntactic analysis.
- local num_calls = evaluation_results.num_calls
- local num_call_tokens = evaluation_results.num_call_tokens
- if num_calls == 0 then
- goto skip_remaining_additional_information
+ if evaluation_results.num_calls_total > 0 and evaluation_results.num_statements_total == 0 then
+ for call_type, num_call_tokens in pairs_sorted_by_descending_values(evaluation_results.num_call_tokens) do
+ local num_calls = evaluation_results.num_calls[call_type]
+ assert(num_calls > 0)
+ assert(num_call_tokens > 0)
+ io.write(string.format("\n- %s top-level %s spanning ", titlecase(humanize(num_calls)), pluralize(call_type, num_calls)))
+ if num_call_tokens == num_tokens then
+ io.write("all tokens")
+ else
+ io.write(string.format("%s %s ", humanize(num_call_tokens), pluralize("token", num_call_tokens)))
+ local formatted_token_ratio = format_ratio(num_call_tokens, num_tokens)
+ if num_expl_bytes == num_total_bytes then
+ io.write(string.format("(%s of total bytes)", formatted_token_ratio))
+ else
+ local formatted_byte_ratio = format_ratio(num_expl_bytes * num_call_tokens, num_total_bytes * num_tokens)
+ io.write(string.format("(%s of tokens, ~%s of total bytes)", formatted_token_ratio, formatted_byte_ratio))
+ end
+ end
+ end
end
- assert(num_call_tokens > 0)
- io.write(string.format("\n- %s top-level expl3 %s spanning ", titlecase(humanize(num_calls)), pluralize("call", num_calls)))
- if num_call_tokens == num_tokens then
- io.write("all tokens")
- else
- io.write(string.format("%s %s ", humanize(num_call_tokens), pluralize("token", num_call_tokens)))
- local formatted_token_ratio = format_ratio(num_call_tokens, num_tokens)
- local formatted_byte_ratio = format_ratio(num_expl_bytes * num_call_tokens, num_total_bytes * num_tokens)
- io.write(string.format("(%s of tokens, ~%s of total bytes)", formatted_token_ratio, formatted_byte_ratio))
+ -- Evaluate the evalution results of the semantic analysis.
+ for statement_type, num_statement_tokens in pairs_sorted_by_descending_values(evaluation_results.num_statement_tokens) do
+ local num_statements = evaluation_results.num_statements[statement_type]
+ assert(num_statements > 0)
+ assert(num_statement_tokens > 0)
+ io.write(string.format("\n- %s top-level ", titlecase(humanize(num_statements))))
+ io.write(string.format("%s spanning ", pluralize(statement_type, num_statements)))
+ if num_statement_tokens == num_tokens then
+ io.write("all tokens")
+ else
+ local formatted_statement_tokens = string.format(
+ "%s %s", humanize(num_statement_tokens), pluralize("token", num_statement_tokens))
+ local formatted_token_ratio = format_ratio(num_statement_tokens, num_tokens)
+ if num_expl_bytes == num_total_bytes then
+ io.write(string.format("%s (%s of total bytes)", formatted_statement_tokens, formatted_token_ratio))
+ else
+ local formatted_byte_ratio = format_ratio(num_expl_bytes * num_statement_tokens, num_total_bytes * num_tokens)
+ io.write(string.format(
+ "%s (%s of tokens, ~%s of total bytes)", formatted_statement_tokens, formatted_token_ratio, formatted_byte_ratio))
+ end
+ end
+ if statement_type == FUNCTION_DEFINITION and evaluation_results.num_replacement_text_statements_total > 0 then
+ local seen_nested_function_definition = false
+ for nested_statement_type, num_nested_statement_tokens in
+ pairs_sorted_by_descending_values(evaluation_results.num_replacement_text_statement_tokens) do
+ local num_nested_statements = evaluation_results.num_replacement_text_statements[nested_statement_type]
+ local max_nesting_depth = evaluation_results.replacement_text_max_nesting_depth[nested_statement_type]
+ assert(num_nested_statements > 0)
+ assert(num_nested_statement_tokens > 0)
+ assert(max_nesting_depth > 0)
+ if nested_statement_type == FUNCTION_DEFINITION then
+ seen_nested_function_definition = true
+ end
+ io.write(string.format("\n%s- %s nested ", line_indent, titlecase(humanize(num_nested_statements))))
+ io.write(string.format("%s ", pluralize(nested_statement_type, num_nested_statements)))
+ if max_nesting_depth > 1 and nested_statement_type == FUNCTION_DEFINITION then
+ io.write(string.format("with a maximum nesting depth of %s, ", humanize(max_nesting_depth)))
+ end
+ io.write(string.format(
+ "spanning %s %s", humanize(num_nested_statement_tokens), pluralize("token", num_nested_statement_tokens)
+ ))
+ if max_nesting_depth > 1 and nested_statement_type ~= FUNCTION_DEFINITION then
+ local num_nested_function_definition_statements = evaluation_results.num_replacement_text_statements[FUNCTION_DEFINITION]
+ assert(num_nested_function_definition_statements > 0)
+ io.write(string.format(
+ ", some in %s",
+ add_article(
+ pluralize(string.format("nested %s", FUNCTION_DEFINITION), num_nested_function_definition_statements),
+ num_nested_function_definition_statements,
+ seen_nested_function_definition,
+ false
+ )
+ ))
+ end
+ end
+ end
end
end
@@ -439,9 +555,12 @@
end
end
-- Evaluate the evalution results of the lexical analysis.
+ local num_tokens = evaluation_results.num_tokens
+ if num_tokens == nil then
+ goto skip_remaining_additional_information
+ end
io.write(string.format("\n\n%s%s", line_indent, colorize("Lexical analysis results:", BOLD)))
- local num_tokens = evaluation_results.num_tokens
- if num_tokens == 0 or num_tokens == nil then
+ if num_tokens == 0 then
io.write(string.format("\n%s- No tokens in expl3 parts", line_indent))
goto skip_remaining_additional_information
end
@@ -458,25 +577,111 @@
end
end
-- Evaluate the evalution results of the syntactic analysis.
+ if evaluation_results.num_calls == nil then
+ goto skip_remaining_additional_information
+ end
io.write(string.format("\n\n%s%s", line_indent, colorize("Syntactic analysis results:", BOLD)))
- local num_calls = evaluation_results.num_calls
- local num_call_tokens = evaluation_results.num_call_tokens
- if num_calls == 0 or num_calls == nil then
- io.write(string.format("\n%s- No top-level expl3 calls", line_indent))
+ if evaluation_results.num_calls_total == 0 then
+ io.write(string.format("\n%s- No top-level %s", line_indent, pluralize("call")))
goto skip_remaining_additional_information
end
- assert(num_calls ~= nil)
- assert(num_calls > 0)
- io.write(string.format("\n%s- %s %s ", line_indent, titlecase(humanize(num_calls)), pluralize("top-level expl3 call", num_calls)))
- io.write("spanning ")
- if num_call_tokens == num_tokens then
- io.write("all tokens")
- else
- local formatted_call_tokens = string.format("%s %s", humanize(num_call_tokens), pluralize("token", num_call_tokens))
- local formatted_token_ratio = format_ratio(num_call_tokens, num_tokens)
- local formatted_byte_ratio = format_ratio(num_expl_bytes * num_call_tokens, num_total_bytes * num_tokens)
- io.write(string.format("%s (%s of tokens, ~%s of file size)", formatted_call_tokens, formatted_token_ratio, formatted_byte_ratio))
+ for call_type, num_call_tokens in pairs_sorted_by_descending_values(evaluation_results.num_call_tokens) do
+ local num_calls = evaluation_results.num_calls[call_type]
+ assert(num_calls ~= nil)
+ assert(num_calls > 0)
+ assert(num_call_tokens ~= nil)
+ assert(num_call_tokens > 0)
+ io.write(string.format("\n%s- %s top-level %s ", line_indent, titlecase(humanize(num_calls)), pluralize(call_type, num_calls)))
+ io.write("spanning ")
+ if num_call_tokens == num_tokens then
+ io.write("all tokens")
+ else
+ local formatted_call_tokens = string.format("%s %s", humanize(num_call_tokens), pluralize("token", num_call_tokens))
+ local formatted_token_ratio = format_ratio(num_call_tokens, num_tokens)
+ if num_expl_bytes == num_total_bytes then
+ io.write(string.format("%s (%s of file size)", formatted_call_tokens, formatted_token_ratio))
+ else
+ local formatted_byte_ratio = format_ratio(num_expl_bytes * num_call_tokens, num_total_bytes * num_tokens)
+ io.write(string.format("%s (%s of tokens, ~%s of file size)", formatted_call_tokens, formatted_token_ratio, formatted_byte_ratio))
+ end
+ end
end
+ if evaluation_results.num_calls_total == nil or evaluation_results.num_calls_total == 0 then
+ goto skip_remaining_additional_information
+ end
+ -- Evaluate the evalution results of the semantic analysis.
+ if evaluation_results.num_statement_tokens == nil then
+ goto skip_remaining_additional_information
+ end
+ io.write(string.format("\n\n%s%s", line_indent, colorize("Semantic analysis results:", BOLD)))
+ if evaluation_results.num_statements_total == 0 then
+ io.write(string.format("\n%s- No top-level %s", line_indent, pluralize("statement")))
+ goto skip_remaining_additional_information
+ end
+ for statement_type, num_statement_tokens in pairs_sorted_by_descending_values(evaluation_results.num_statement_tokens) do
+ local num_statements = evaluation_results.num_statements[statement_type]
+ assert(num_statements ~= nil)
+ assert(num_statements > 0)
+ assert(num_statement_tokens ~= nil)
+ assert(num_statement_tokens > 0)
+ io.write(string.format("\n%s- %s top-level ", line_indent, titlecase(humanize(num_statements))))
+ io.write(string.format("%s spanning ", pluralize(statement_type, num_statements)))
+ if num_statement_tokens == num_tokens then
+ io.write("all tokens")
+ else
+ local formatted_statement_tokens = string.format(
+ "%s %s", humanize(num_statement_tokens), pluralize("token", num_statement_tokens))
+ local formatted_token_ratio = format_ratio(num_statement_tokens, num_tokens)
+ if num_expl_bytes == num_total_bytes then
+ io.write(string.format("%s (%s of file size)", formatted_statement_tokens, formatted_token_ratio))
+ else
+ local formatted_byte_ratio = format_ratio(num_expl_bytes * num_statement_tokens, num_total_bytes * num_tokens)
+ io.write(string.format(
+ "%s (%s of tokens, ~%s of file size)", formatted_statement_tokens, formatted_token_ratio, formatted_byte_ratio))
+ end
+ end
+ if statement_type == FUNCTION_DEFINITION and evaluation_results.num_replacement_text_statements_total > 0 then
+ local seen_nested_function_definition = false
+ for nested_statement_type, num_nested_statement_tokens in
+ pairs_sorted_by_descending_values(evaluation_results.num_replacement_text_statement_tokens) do
+ local num_nested_statements = evaluation_results.num_replacement_text_statements[nested_statement_type]
+ local max_nesting_depth = evaluation_results.replacement_text_max_nesting_depth[nested_statement_type]
+ assert(num_nested_statements ~= nil)
+ assert(num_nested_statements > 0)
+ assert(num_nested_statement_tokens ~= nil)
+ assert(num_nested_statement_tokens > 0)
+ assert(max_nesting_depth ~= nil)
+ assert(max_nesting_depth > 0)
+ if nested_statement_type == FUNCTION_DEFINITION then
+ seen_nested_function_definition = true
+ end
+ io.write(string.format("\n%s- %s nested ", line_indent:rep(2), titlecase(humanize(num_nested_statements))))
+ io.write(string.format("%s ", pluralize(nested_statement_type, num_nested_statements)))
+ if max_nesting_depth > 1 and nested_statement_type == FUNCTION_DEFINITION then
+ io.write(string.format("with a maximum nesting depth of %s, ", humanize(max_nesting_depth)))
+ end
+ io.write(string.format(
+ "spanning %s %s", humanize(num_nested_statement_tokens), pluralize("token", num_nested_statement_tokens)
+ ))
+ if max_nesting_depth > 1 and nested_statement_type ~= FUNCTION_DEFINITION then
+ local num_nested_function_definition_statements = evaluation_results.num_replacement_text_statements[FUNCTION_DEFINITION]
+ assert(num_nested_function_definition_statements > 0)
+ io.write(string.format(
+ ", some in %s",
+ add_article(
+ pluralize(string.format("nested %s", FUNCTION_DEFINITION), num_nested_function_definition_statements),
+ num_nested_function_definition_statements,
+ seen_nested_function_definition,
+ false
+ )
+ ))
+ end
+ end
+ end
+ end
+ if evaluation_results.num_statements_total == nil or evaluation_results.num_statements_total == 0 then
+ goto skip_remaining_additional_information
+ end
end
::skip_remaining_additional_information::
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-lexical-analysis.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-lexical-analysis.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-lexical-analysis.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -16,6 +16,7 @@
local token_types = {
CONTROL_SEQUENCE = "control sequence",
CHARACTER = "character",
+ ARGUMENT = "argument", -- corresponds to zero or more tokens inserted by a function call, never produced by lexical analysis
}
local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE
@@ -295,8 +296,9 @@
local next_token_type, next_csname, _, next_range = table.unpack(next_token)
if next_token_type == CONTROL_SEQUENCE then
if (
- lpeg.match(parsers.expl3_function_assignment_csname, csname) ~= nil
+ lpeg.match(parsers.expl3_function_definition_or_assignment_csname, csname) ~= nil
and lpeg.match(parsers.expl3like_csname, next_csname) ~= nil
+ and lpeg.match(parsers.expl3_expansion_csname, next_csname) == nil
and lpeg.match(parsers.expl3_function_csname, next_csname) == nil
) then
issues:add('s205', 'malformed function name', next_range)
@@ -304,6 +306,7 @@
if (
lpeg.match(parsers.expl3_variable_or_constant_use_csname, csname) ~= nil
and lpeg.match(parsers.expl3like_csname, next_csname) ~= nil
+ and lpeg.match(parsers.expl3_expansion_csname, next_csname) == nil
and lpeg.match(parsers.expl3_scratch_variable_csname, next_csname) == nil
and lpeg.match(parsers.expl3_variable_or_constant_csname, next_csname) == nil
) then
@@ -312,6 +315,7 @@
if (
lpeg.match(parsers.expl3_quark_or_scan_mark_definition_csname, csname) ~= nil
and lpeg.match(parsers.expl3_quark_or_scan_mark_csname, next_csname) == nil
+ and lpeg.match(parsers.expl3_expansion_csname, next_csname) == nil
) then
issues:add('s207', 'malformed quark or scan mark name', next_range)
end
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-obsolete.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-obsolete.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-obsolete.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -9,7 +9,7 @@
-- luacheck: push no max line length
local obsolete = {}
-obsolete.deprecated_csname = (P("p") * (P("d") * (P("f") * (P("_") * (P("o") * (P("b") * (P("j") * (P("e") * (P("c") * (P("t") * (P("_") * (P("new:nn") + P("w") * (P("r") * (P("i") * (P("t") * (P("e") * (P(":") * (P("n") * (P("n") + P("x")))))))))))))))))) + P("e") * (P("e") * (P("k") * (P("_") * (P("m") * (P("e") * (P("a") * (P("n") * (P("i") * (P("n") * (P("g") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))) + P("c") * (P("h") * (P("a") * (P("r") * (P("c") * (P("o") * (P("d") * (P("e") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))) + P("a") * (P("t") * (P("c") * (P("o") * (P("d") * (P("e") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))))))) + P("r") * (P("o") * (P("p") * (P("_") * (P("p") * (P("u") * (P("t") * (P("_") * (P("i") * (P("f") * (P("_") * (P("n") * (P("e") * (P("w") * (P(":") * (P("N") * (P("n") * (P("n") + P("V")) + P("Vn")) + P("c") * (P("n") * (P("n") + P("V")) + P("Vn"))))))))))))) + P("g") * (P("p") * (P("u") * (P("t") * (P("_") * (P("i") * (P("f") * (P("_") * (P("n") * (P("e") * (P("w") * (P(":") * (P("N") * (P("n") * (P("n") + P("V")) + P("Vn")) + P("c") * (P("n") * (P("n") + P("V")) + P("Vn"))))))))))))))))))) + P("i") * (P("o") * (P("w") * (P("_") * (P("s") * (P("h") * (P("i") * (P("p") * (P("o") * (P("u") * (P("t") * (P("_") * (P("x") * (P(":") * (P("N") * (P("n") + P("x")) + P("c") * (P("n") + P("x")))))))))))))))) + P("t") * (P("l") * (P("_") * (P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("m") * (P("i") * (P("x") * (P("e") * (P("d") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("b") * (P("u") * (P("i") * (P("l") * (P("d") * (P("_") * (P("g") * (P("et:NN") + P("clear:N")) + P("clear:N"))))))) + P("c") * (P("a") * (P("s") * (P(!
"e") * (P(":") * (P("N") * (P("n") * (P("TF") + P("F") + P("T")) + P("n")) + P("c") * (P("n") * (P("TF") + P("F") + P("T")) + P("n"))))))))) + P("e") * (P("x") * (P("t") * (P("_") * (P("t") * (P("i") * (P("t") * (P("l") * (P("e") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))))))) + P("s") * (P("t") * (P("r") * (P("_") * (P("declare_eight_bit_encoding:nnn") + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("f") + P("n")))))))))))) + P("f") * (P("o") * (P("l") * (P("d") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("n") + P("V")))))) + P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("n") + P("V"))))))))))) + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("f") + P("n"))))))))))))))) + P("e") * (P("q") * (P("_") * (P("i") * (P("n") * (P("d") * (P("e") * (P("x") * (P("e") * (P("d") * (P("_") * (P("m") * (P("a") * (P("p") * (P("_") * (P("function:NN") + P("inline:Nn"))))))))))))) + P("set_map_x:NNn") + P("gset_map_x:NNn")))) + P("ys_load_deprecation:")) + P("l") * (P("_") * (P("t") * (P("e") * (P("x") * (P("t") * (P("_") * (P("letterlike_tl") + P("accents_tl")))))) + P("k") * (P("e") * (P("y") * (P("s") * (P("_") * (P("path_tl") + P("key_tl")))))))) + P("m") * (P("s") * (P("g") * (P("_") * (P("g") * (P("s") * (P("e") * (P("t") * (P(":") * (P("n") * (P("n") * (P("nn") + P("n")))))))))))) + P("k") * (P("e") * (P("y") * (P("s") * (P("_") * (P("s") * (P("e") * (P("t") * (P("_") * (P("f") * (P("i") * (P("l") * (P("t") * (P("e") * (P("r") * (P(":") * (P("n") * (P("n") * (P("n") * (P("nN") + P("N")) + P("v") * (P("nN") + P("N")) + P("V") * (P("nN") + P("N")) + P("o") * (P("nN") + P("N")) + P("n") + P("V") + P("v") + P("o"))))))))))))))))))) + P("c") * (P("h") * (P("a") * (P("r") * (P("_") * (P("f") * (P("o") * (P("l") * (P("d") * (P("case:N") + P("_case:N"))))) + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))!
) + P("t") * (P("o") * (P("_") * (P("nfd:N") + P("utfviii_bytes:n"))) + P("itlecase:N")) + P("mixed_case:N") + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))) + P("s") * (P("t") * (P("r") * (P("_") * (P("f") * (P("o") * (P("l") * (P("d") * (P("case:N") + P("_case:N"))))) + P("titlecase:N") + P("mixed_case:N") + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))) + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))))))))))) + P("s_argument_spec:N"))) * eof
+obsolete.deprecated_csname = (P("t") * (P("l") * (P("_") * (P("b") * (P("u") * (P("i") * (P("l") * (P("d") * (P("_") * (P("g") * (P("et:NN") + P("clear:N")) + P("clear:N"))))))) + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("m") * (P("i") * (P("x") * (P("e") * (P("d") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))) + P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("N") * (P("n") * (P("TF") + P("F") + P("T")) + P("n")) + P("c") * (P("n") * (P("TF") + P("F") + P("T")) + P("n"))))))))) + P("e") * (P("x") * (P("t") * (P("_") * (P("t") * (P("i") * (P("t") * (P("l") * (P("e") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("nn") + P("n")))))))))))))))) + P("i") * (P("o") * (P("w") * (P("_") * (P("s") * (P("h") * (P("i") * (P("p") * (P("o") * (P("u") * (P("t") * (P("_") * (P("x") * (P(":") * (P("N") * (P("n") + P("x")) + P("c") * (P("n") + P("x")))))))))))))))) + P("p") * (P("r") * (P("o") * (P("p") * (P("_") * (P("p") * (P("u") * (P("t") * (P("_") * (P("i") * (P("f") * (P("_") * (P("n") * (P("e") * (P("w") * (P(":") * (P("N") * (P("n") * (P("n") + P("V")) + P("Vn")) + P("c") * (P("n") * (P("n") + P("V")) + P("Vn"))))))))))))) + P("g") * (P("p") * (P("u") * (P("t") * (P("_") * (P("i") * (P("f") * (P("_") * (P("n") * (P("e") * (P("w") * (P(":") * (P("N") * (P("n") * (P("n") + P("V")) + P("Vn")) + P("c") * (P("n") * (P("n") + P("V")) + P("Vn")))))))))))))))))) + P("e") * (P("e") * (P("k") * (P("_") * (P("m") * (P("e") * (P("a") * (P("n") * (P("i") * (P("n") * (P("g") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))) + P("c") * (P("h") * (P("a") * (P("r") * (P("c") * (P("o") * (P("d") * (P("e") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))) + P("a") * (P("t") * (P("c") * (P("o") *!
(P("d") * (P("e") * (P("_") * (P("remove_ignore_spaces:N") + P("ignore_spaces:N"))))))))))))) + P("d") * (P("f") * (P("_") * (P("o") * (P("b") * (P("j") * (P("e") * (P("c") * (P("t") * (P("_") * (P("new:nn") + P("w") * (P("r") * (P("i") * (P("t") * (P("e") * (P(":") * (P("n") * (P("n") + P("x"))))))))))))))))))) + P("s") * (P("e") * (P("q") * (P("_") * (P("i") * (P("n") * (P("d") * (P("e") * (P("x") * (P("e") * (P("d") * (P("_") * (P("m") * (P("a") * (P("p") * (P("_") * (P("function:NN") + P("inline:Nn"))))))))))))) + P("set_map_x:NNn") + P("gset_map_x:NNn")))) + P("t") * (P("r") * (P("_") * (P("f") * (P("o") * (P("l") * (P("d") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("n") + P("V")))))) + P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("n") + P("V"))))))))))) + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("f") + P("n")))))))))))) + P("declare_eight_bit_encoding:nnn") + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("_") * (P("c") * (P("a") * (P("s") * (P("e") * (P(":") * (P("f") + P("n"))))))))))))))) + P("ys_load_deprecation:")) + P("k") * (P("e") * (P("y") * (P("s") * (P("_") * (P("s") * (P("e") * (P("t") * (P("_") * (P("f") * (P("i") * (P("l") * (P("t") * (P("e") * (P("r") * (P(":") * (P("n") * (P("n") * (P("V") * (P("nN") + P("N")) + P("n") * (P("nN") + P("N")) + P("v") * (P("nN") + P("N")) + P("o") * (P("nN") + P("N")) + P("n") + P("V") + P("v") + P("o"))))))))))))))))))) + P("m") * (P("s") * (P("g") * (P("_") * (P("g") * (P("s") * (P("e") * (P("t") * (P(":") * (P("n") * (P("n") * (P("nn") + P("n")))))))))))) + P("l") * (P("_") * (P("t") * (P("e") * (P("x") * (P("t") * (P("_") * (P("letterlike_tl") + P("accents_tl")))))) + P("k") * (P("e") * (P("y") * (P("s") * (P("_") * (P("path_tl") + P("key_tl")))))))) + P("c") * (P("h") * (P("a") * (P("r") * (P("_") * (P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))) + P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("case:N") + P("_c!
ase:N")))))) + P("f") * (P("o") * (P("l") * (P("d") * (P("case:N") + P("_case:N"))))) + P("mixed_case:N") + P("t") * (P("o") * (P("_") * (P("nfd:N") + P("utfviii_bytes:n"))) + P("itlecase:N")) + P("s") * (P("t") * (P("r") * (P("_") * (P("u") * (P("p") * (P("p") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))) + P("f") * (P("o") * (P("l") * (P("d") * (P("case:N") + P("_case:N"))))) + P("mixed_case:N") + P("titlecase:N") + P("l") * (P("o") * (P("w") * (P("e") * (P("r") * (P("case:N") + P("_case:N")))))))))))))) + P("s_argument_spec:N"))) * eof
-- luacheck: pop
return obsolete
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-parsers.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-parsers.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-parsers.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,7 +1,7 @@
-- Common LPEG parsers used by different modules of the static analyzer explcheck.
local lpeg = require("lpeg")
-local C, Cp, Cs, Ct, Cmt, P, R, S = lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.P, lpeg.R, lpeg.S
+local C, Cc, Cp, Cs, Ct, Cmt, P, R, S = lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.P, lpeg.R, lpeg.S
-- Base parsers
---- Generic
@@ -168,6 +168,14 @@
local parameter_argument_specifier = S("p")
local weird_argument_specifier = S("w")
local do_not_use_argument_specifier = S("D")
+local N_or_n_type_argument_specifier = (
+ N_type_argument_specifier
+ + n_type_argument_specifier
+)
+local N_or_n_type_argument_specifiers = (
+ N_or_n_type_argument_specifier^0
+ * eof
+)
local argument_specifier = (
N_type_argument_specifier
+ n_type_argument_specifier
@@ -175,7 +183,10 @@
+ weird_argument_specifier
+ do_not_use_argument_specifier
)
-local argument_specifiers = argument_specifier^0 * eof
+local argument_specifiers = (
+ argument_specifier^0
+ * eof
+)
local do_not_use_argument_specifiers = (
(
argument_specifier
@@ -253,7 +264,7 @@
* eof
)
local expl3_scratch_variable_csname = (
- P("l")
+ S("gl")
* underscore
* P("tmp") * S("ab")
* underscore
@@ -489,9 +500,24 @@
* latex_style_file_csname
)
+---- Argument expansion functions from the module l3expan
+local expl3_expansion_csname = (
+ P("exp")
+ * underscore
+ * letter * (letter + underscore)^0
+ * colon
+)
+
---- Assigning functions
-local expl3_function_assignment_csname = (
- P("cs_")
+local expl3_function_definition_csname = Ct(
+ P("cs_new")
+ * (P("_protected") * Cc(true) + Cc(false))
+ * (P("_nopar") * Cc(true) + Cc(false))
+ * P(":N")
+)
+local expl3_function_definition_or_assignment_csname = (
+ P("cs")
+ * underscore
* (
(
P("new")
@@ -508,17 +534,40 @@
* P(":N")
)
+---- Function calls with Lua arguments
+local expl3_function_call_with_lua_code_argument_csname = Ct(
+ P("lua")
+ * underscore
+ * (
+ P("now")
+ + P("shipout")
+ )
+ * colon
+ * S("noex")
+ * eof
+ * Cc(1)
+ + success
+)
+
---- Using variables/constants
local expl3_variable_or_constant_use_csname = (
expl3_variable_or_constant_type
- * P("_")
+ * underscore
* (
P("const")
+ P("new")
+ P("g")^-1
* P("set")
- * P("_eq")^-1
+ * (
+ underscore
+ * (
+ P("eq")
+ + P("true")
+ + P("false")
+ )
+ )^-1
+ P("use")
+ + P("show")
)
* P(":N")
)
@@ -532,7 +581,10 @@
* P("_new:N")
* eof
)
-local expl3_quark_or_scan_mark_csname = S("qs") * P("_")
+local expl3_quark_or_scan_mark_csname = (
+ S("qs")
+ * underscore
+)
return {
any = any,
@@ -547,7 +599,10 @@
eof = eof,
expl3_catcodes = expl3_catcodes,
expl3_endlinechar = expl3_endlinechar,
- expl3_function_assignment_csname = expl3_function_assignment_csname,
+ expl3_expansion_csname = expl3_expansion_csname,
+ expl3_function_definition_csname = expl3_function_definition_csname,
+ expl3_function_definition_or_assignment_csname = expl3_function_definition_or_assignment_csname,
+ expl3_function_call_with_lua_code_argument_csname = expl3_function_call_with_lua_code_argument_csname,
expl3_function_csname = expl3_function_csname,
expl3like_csname = expl3like_csname,
expl3like_material = expl3like_material,
@@ -563,6 +618,7 @@
latex_style_file_content = latex_style_file_content,
linechar = linechar,
newline = newline,
+ N_or_n_type_argument_specifiers = N_or_n_type_argument_specifiers,
n_type_argument_specifier = n_type_argument_specifier,
N_type_argument_specifier = N_type_argument_specifier,
parameter_argument_specifier = parameter_argument_specifier,
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-ranges.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-ranges.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-ranges.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -129,18 +129,22 @@
end
end
--- Get an iterator over the range.
-function Range:iter()
+-- Get an iterator over pairs of indices and items from the original array within the range.
+function Range:enumerate(original_array)
if #self == 0 then
return function() -- empty range
return nil
end
else
+ assert(self:start() >= 1)
+ assert(self:start() <= #original_array)
+ assert(self:stop() >= self:start())
+ assert(self:stop() <= #original_array)
local i = self:start() - 1
return function() -- non-empty range
i = i + 1
if i <= self:stop() then
- return i
+ return i, original_array[i]
else
return nil
end
Added: trunk/Master/texmf-dist/scripts/expltools/explcheck-semantic-analysis.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-semantic-analysis.lua (rev 0)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-semantic-analysis.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -0,0 +1,227 @@
+-- The semantic analysis step of static analysis determines the meaning of the different function calls.
+
+local token_types = require("explcheck-lexical-analysis").token_types
+local syntactic_analysis = require("explcheck-syntactic-analysis")
+local ranges = require("explcheck-ranges")
+local parsers = require("explcheck-parsers")
+local identity = require("explcheck-utils").identity
+
+local ARGUMENT = token_types.ARGUMENT
+
+local new_range = ranges.new_range
+local range_flags = ranges.range_flags
+
+local INCLUSIVE = range_flags.INCLUSIVE
+local MAYBE_EMPTY = range_flags.MAYBE_EMPTY
+
+local call_types = syntactic_analysis.call_types
+local get_calls = syntactic_analysis.get_calls
+local transform_replacement_text_tokens = syntactic_analysis.transform_replacement_text_tokens
+
+local CALL = call_types.CALL
+local OTHER_TOKENS = call_types.OTHER_TOKENS
+
+local lpeg = require("lpeg")
+
+local statement_types = {
+ FUNCTION_DEFINITION = "function definition",
+ OTHER_STATEMENT = "other statement",
+ OTHER_TOKENS_SIMPLE = "block of other simple tokens",
+ OTHER_TOKENS_COMPLEX = "block of other complex tokens",
+}
+
+local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
+local OTHER_STATEMENT = statement_types.OTHER_STATEMENT
+local OTHER_TOKENS_SIMPLE = statement_types.OTHER_TOKENS_SIMPLE
+local OTHER_TOKENS_COMPLEX = statement_types.OTHER_TOKENS_COMPLEX
+
+local simple_text_catcodes = {
+ [3] = true, -- math shift
+ [4] = true, -- alignment tab
+ [5] = true, -- end of line
+ [7] = true, -- superscript
+ [8] = true, -- subscript
+ [9] = true, -- ignored character
+ [10] = true, -- space
+ [11] = true, -- letter
+ [12] = true, -- other
+}
+
+-- Determine the meaning of function calls and register any issues.
+local function semantic_analysis(pathname, content, issues, results, options) -- luacheck: ignore pathname options
+
+ -- Determine the type of a span of tokens as either "simple text" [1, p. 383] with no expected side effects or
+ -- a more complex material that may have side effects and presents a boundary between chunks of well-understood
+ -- expl3 statements.
+ --
+ -- [1]: Donald Ervin Knuth. 1986. TeX: The Program. Addison-Wesley, USA.
+ --
+ local function classify_tokens(tokens, token_range)
+ for _, token in token_range:enumerate(tokens) do
+ local catcode = token[3]
+ if simple_text_catcodes[catcode] == nil then
+ return OTHER_TOKENS_COMPLEX
+ end
+ end
+ return OTHER_TOKENS_SIMPLE
+ end
+
+ -- Extract statements from function calls and record them. For all identified function definitions, also record replacement texts.
+ local function record_statements_and_replacement_texts(tokens, transformed_tokens, calls, first_map_back, first_map_forward)
+ local statements = {}
+ local replacement_text_tokens = {}
+ for _, call in ipairs(calls) do
+ local call_type, token_range = table.unpack(call)
+ local statement
+ if call_type == CALL then -- a function call
+ local _, _, csname, arguments = table.unpack(call)
+ -- ignore error S204 (Missing stylistic whitespaces) in Lua code
+ for _, arguments_number in ipairs(lpeg.match(parsers.expl3_function_call_with_lua_code_argument_csname, csname)) do
+ local _, lua_code_token_range = table.unpack(arguments[arguments_number])
+ if #lua_code_token_range > 0 then
+ local lua_code_byte_range = new_range(
+ tokens[lua_code_token_range:start()][4]:start(),
+ tokens[lua_code_token_range:stop()][4]:stop(),
+ INCLUSIVE,
+ #content
+ )
+ issues:ignore('s204', lua_code_byte_range)
+ end
+ end
+ local function_definition = lpeg.match(parsers.expl3_function_definition_csname, csname)
+ if function_definition ~= nil then -- function definition
+ local protected, nopar = table.unpack(function_definition) -- determine properties of the defined function
+ -- determine the replacement text
+ local replacement_text_specifier, replacement_text_token_range = table.unpack(arguments[#arguments])
+ if replacement_text_specifier ~= "n" then -- replacement text is hidden behind expansion, give up
+ goto other_statement
+ end
+ -- determine the name of the defined function
+ local defined_csname_specifier, defined_csname_token_range = table.unpack(arguments[1])
+ assert(defined_csname_specifier == "N" and #defined_csname_token_range == 1)
+ local defined_csname_token_type, defined_csname
+ = table.unpack(transformed_tokens[first_map_forward(defined_csname_token_range:start())])
+ if defined_csname_token_type == ARGUMENT then -- name is hidden behind an argument, give up
+ goto other_statement
+ end
+ assert(defined_csname ~= nil)
+ -- determine the number of parameters of the defined function
+ local num_parameters
+ local _, _, argument_specifiers = defined_csname:find(":([^:]*)") -- first, parse the name of the defined function
+ if argument_specifiers ~= nil and lpeg.match(parsers.N_or_n_type_argument_specifiers, argument_specifiers) ~= nil then
+ num_parameters = #argument_specifiers
+ end
+ for _, argument in ipairs(arguments) do -- next, try to look for p-type "TeX parameter" argument specifiers
+ if lpeg.match(parsers.parameter_argument_specifier, argument[1]) and argument[3] ~= nil then
+ if num_parameters == nil or argument[3] > num_parameters then -- if one method gives a higher number, trust it
+ num_parameters = argument[3]
+ end
+ assert(num_parameters ~= nil)
+ break
+ end
+ end
+ if num_parameters == nil then -- we couldn't determine the number of parameters, give up
+ goto other_statement
+ end
+ -- parse the replacement text and record the function definition
+ local mapped_replacement_text_token_range = new_range(
+ first_map_forward(replacement_text_token_range:start()),
+ first_map_forward(replacement_text_token_range:stop()),
+ INCLUSIVE + MAYBE_EMPTY,
+ #transformed_tokens
+ )
+ local doubly_transformed_tokens, second_map_back, second_map_forward
+ = transform_replacement_text_tokens(content, transformed_tokens, issues, num_parameters, mapped_replacement_text_token_range)
+ if doubly_transformed_tokens == nil then -- we couldn't parse the replacement text, give up
+ goto other_statement
+ end
+ local function map_back(...) return first_map_back(second_map_back(...)) end
+ local function map_forward(...) return second_map_forward(first_map_forward(...)) end
+ table.insert(replacement_text_tokens, {replacement_text_token_range, doubly_transformed_tokens, map_back, map_forward})
+ statement = {FUNCTION_DEFINITION, protected, nopar, #replacement_text_tokens}
+ goto continue
+ end
+ ::other_statement::
+ statement = {OTHER_STATEMENT}
+ ::continue::
+ elseif call_type == OTHER_TOKENS then -- other tokens
+ local statement_type = classify_tokens(tokens, token_range)
+ statement = {statement_type}
+ else
+ error('Unexpected call type "' .. call_type .. '"')
+ end
+ table.insert(statements, statement)
+ end
+ assert(#statements == #calls)
+ return statements, replacement_text_tokens
+ end
+
+ -- Extract statements from function calls. For all identified function definitions, record replacement texts and recursively
+ -- apply syntactic and semantic analysis on them.
+ local function get_statements(tokens, groupings, calls)
+
+ -- First, record top-level statements.
+ local replacement_texts = {tokens = nil, calls = {}, statements = {}, nesting_depth = {}}
+ local statements
+ statements, replacement_texts.tokens = record_statements_and_replacement_texts(tokens, tokens, calls, identity, identity)
+
+ -- Then, process any new replacement texts until convergence.
+ local previous_num_replacement_texts = 0
+ local current_num_replacement_texts = #replacement_texts.tokens
+ local current_nesting_depth = 1
+ while previous_num_replacement_texts < current_num_replacement_texts do
+ for replacement_text_number = previous_num_replacement_texts + 1, current_num_replacement_texts do
+ local replacement_text_tokens = replacement_texts.tokens[replacement_text_number]
+ local replacement_text_token_range, transformed_tokens, map_back, map_forward = table.unpack(replacement_text_tokens)
+ -- record the current nesting depth with the replacement text
+ table.insert(replacement_texts.nesting_depth, current_nesting_depth)
+ -- extract nested calls from the replacement text using syntactic analysis
+ local nested_calls
+ = get_calls(tokens, transformed_tokens, replacement_text_token_range, map_back, map_forward, issues, groupings)
+ table.insert(replacement_texts.calls, nested_calls)
+ -- extract nested statements and replacement texts from the nested calls using semactic analysis
+ local nested_statements, nested_replacement_text_tokens
+ = record_statements_and_replacement_texts(tokens, transformed_tokens, nested_calls, map_back, map_forward)
+ for _, nested_statement in ipairs(nested_statements) do
+ if nested_statement[1] == FUNCTION_DEFINITION then
+ -- make the reference to the replacement text absolute instead of relative
+ nested_statement[#nested_statement] = nested_statement[#nested_statement] + current_num_replacement_texts
+ end
+ end
+ table.insert(replacement_texts.statements, nested_statements)
+ for _, nested_tokens in ipairs(nested_replacement_text_tokens) do
+ table.insert(replacement_texts.tokens, nested_tokens)
+ end
+ end
+ previous_num_replacement_texts = current_num_replacement_texts
+ current_num_replacement_texts = #replacement_texts.tokens
+ current_nesting_depth = current_nesting_depth + 1
+ end
+
+ assert(#replacement_texts.tokens == current_num_replacement_texts)
+ assert(#replacement_texts.calls == current_num_replacement_texts)
+ assert(#replacement_texts.statements == current_num_replacement_texts)
+ assert(#replacement_texts.nesting_depth == current_num_replacement_texts)
+
+ return statements, replacement_texts
+ end
+
+ local statements = {}
+ local replacement_texts = {}
+ for part_number, part_calls in ipairs(results.calls) do
+ local part_tokens = results.tokens[part_number]
+ local part_groupings = results.groupings[part_number]
+ local part_statements, part_replacement_texts = get_statements(part_tokens, part_groupings, part_calls)
+ table.insert(statements, part_statements)
+ table.insert(replacement_texts, part_replacement_texts)
+ end
+
+ -- Store the intermediate results of the analysis.
+ results.statements = statements
+ results.replacement_texts = replacement_texts
+end
+
+return {
+ process = semantic_analysis,
+ statement_types = statement_types,
+}
Property changes on: trunk/Master/texmf-dist/scripts/expltools/explcheck-semantic-analysis.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-syntactic-analysis.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-syntactic-analysis.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-syntactic-analysis.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -1,7 +1,9 @@
-- The syntactic analysis step of static analysis converts TeX tokens into a tree of function calls.
+local token_types = require("explcheck-lexical-analysis").token_types
local ranges = require("explcheck-ranges")
local parsers = require("explcheck-parsers")
+local identity = require("explcheck-utils").identity
local new_range = ranges.new_range
local range_flags = ranges.range_flags
@@ -10,64 +12,179 @@
local INCLUSIVE = range_flags.INCLUSIVE
local MAYBE_EMPTY = range_flags.MAYBE_EMPTY
+local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE
+local CHARACTER = token_types.CHARACTER
+local ARGUMENT = token_types.ARGUMENT
+
local lpeg = require("lpeg")
local call_types = {
- CALL = "call",
- OTHER_TOKENS = "other tokens",
+ CALL = "expl3 call",
+ OTHER_TOKENS = "block of other tokens",
}
local CALL = call_types.CALL
local OTHER_TOKENS = call_types.OTHER_TOKENS
--- Convert the content to a tree of function calls an register any issues.
-local function syntactic_analysis(pathname, content, issues, results, options) -- luacheck: ignore pathname content options
+-- Transform parameter tokens in a replacement text.
+local function transform_replacement_text_tokens(content, tokens, issues, num_parameters, replacement_text_token_range)
+ local deleted_token_numbers, transformed_tokens = {}, {}
+ if #replacement_text_token_range == 0 then
+ return transformed_tokens, identity, identity
+ end
- local token_types = require("explcheck-lexical-analysis").token_types
+ local token_number = replacement_text_token_range:start()
+ while token_number <= replacement_text_token_range:stop() do
+ local token = tokens[token_number]
+ local token_type, _, catcode, byte_range = table.unpack(token)
+ local next_token_number = token_number + 1
+ if token_type == CHARACTER and catcode == 6 then -- parameter
+ if next_token_number > replacement_text_token_range:stop() then -- not followed by anything, the replacement text is invalid
+ return nil
+ end
+ local next_token = tokens[next_token_number]
+ local next_token_type, next_payload, next_catcode, next_byte_range
+ = table.unpack(next_token)
+ if next_token_type == CHARACTER and next_catcode == 6 then -- followed by another parameter, remove one of the tokens
+ local transformed_token = {CHARACTER, next_payload, 6, new_range(byte_range:start(), next_byte_range:stop(), INCLUSIVE, #content)}
+ table.insert(transformed_tokens, transformed_token)
+ table.insert(deleted_token_numbers, next_token_number)
+ next_token_number = next_token_number + 1
+ elseif next_token_type == CHARACTER and lpeg.match(parsers.decimal_digit, next_payload) then -- followed by a digit
+ local next_digit = tonumber(next_payload)
+ assert(next_digit ~= nil)
+ if next_digit <= num_parameters then -- a correct digit, remove it and replace the parameter with a function call argument
+ local transformed_token = {ARGUMENT, nil, nil, new_range(byte_range:start(), next_byte_range:stop(), INCLUSIVE, #content)}
+ table.insert(transformed_tokens, transformed_token)
+ table.insert(deleted_token_numbers, next_token_number)
+ next_token_number = next_token_number + 1
+ else -- an incorrect digit, the replacement text is invalid
+ issues:add('e304', 'unexpected parameter number', next_byte_range)
+ return nil
+ end
+ elseif next_token_type == ARGUMENT then -- followed by a function call argument
+ -- the argument could be a correct digit, so let's remove it and replace it with another function call argument
+ local transformed_token = {ARGUMENT, nil, nil, new_range(byte_range:start(), next_byte_range:stop(), INCLUSIVE, #content)}
+ table.insert(transformed_tokens, transformed_token)
+ table.insert(deleted_token_numbers, next_token_number)
+ next_token_number = next_token_number + 1
+ else -- followed by some other token, the replacement text is invalid
+ return nil
+ end
+ else -- not a parameter, copy it unchanged
+ table.insert(transformed_tokens, token)
+ end
+ token_number = next_token_number
+ end
- local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE
- local CHARACTER = token_types.CHARACTER
+ -- Transform indexes in the transformed tokens to indexes in the original tokens.
+ local token_number_offset = replacement_text_token_range:start() - 1
- -- Extract function calls from TeX tokens and groupings.
- local function get_calls(tokens, token_range, groupings)
- local calls = {}
- if #token_range == 0 then
- return calls
+ local function map_back(transformed_token_number)
+ assert(transformed_token_number >= 1)
+ assert(transformed_token_number <= #transformed_tokens)
+ local original_token_number = transformed_token_number + token_number_offset
+ for _, deleted_token_number in ipairs(deleted_token_numbers) do
+ if deleted_token_number > original_token_number then
+ break
+ end
+ original_token_number = original_token_number + 1
end
+ return original_token_number
+ end
- local token_number = token_range:start()
+ -- Transform indexes in the original tokens to indexes in the transformed tokens.
+ local function map_forward(original_token_number)
+ assert(original_token_number >= 1)
+ assert(original_token_number <= #tokens)
+ local transformed_token_number = original_token_number
+ for _, deleted_token_number in ipairs(deleted_token_numbers) do
+ if deleted_token_number > original_token_number then
+ break
+ end
+ transformed_token_number = transformed_token_number - 1
+ end
+ return transformed_token_number - token_number_offset
+ end
- -- Record a range of unrecognized tokens.
- local function record_other_tokens(other_token_range)
- local previous_call = #calls > 0 and calls[#calls] or nil
- if previous_call == nil or previous_call[1] ~= OTHER_TOKENS then -- record a new span of other tokens between calls
- table.insert(calls, {OTHER_TOKENS, other_token_range})
- else -- extend the previous span of other tokens
- assert(previous_call[1] == OTHER_TOKENS)
- assert(previous_call[2]:stop() == other_token_range:start() - 1)
- previous_call[2] = new_range(previous_call[2]:start(), other_token_range:stop(), INCLUSIVE, #tokens)
+ return transformed_tokens, map_back, map_forward
+end
+
+-- Extract function calls from TeX tokens and groupings.
+local function get_calls(tokens, transformed_tokens, token_range, map_back, map_forward, issues, groupings)
+ local calls = {}
+ if #token_range == 0 then
+ return calls
+ end
+
+ local token_number = map_forward(token_range:start())
+ local transformed_token_range_end = map_forward(token_range:stop())
+
+ -- Record a range of unrecognized tokens.
+ local function record_other_tokens(other_token_range) -- the range is in tokens, not transformed_tokens
+ local previous_call = #calls > 0 and calls[#calls] or nil
+ if previous_call == nil or previous_call[1] ~= OTHER_TOKENS then -- record a new span of other tokens between calls
+ table.insert(calls, {OTHER_TOKENS, other_token_range})
+ else -- extend the previous span of other tokens
+ assert(previous_call[1] == OTHER_TOKENS)
+ previous_call[2] = new_range(previous_call[2]:start(), other_token_range:stop(), INCLUSIVE, #tokens)
+ end
+ end
+
+ -- Count the number of parameters in a parameter text.
+ local function count_parameters_in_parameter_text(parameter_text_token_range) -- the range is in transformed_tokens, not tokens
+ local num_parameters = 0
+ for token_number, token in parameter_text_token_range:enumerate(transformed_tokens) do -- luacheck: ignore token_number
+ local token_type, _, catcode = table.unpack(token)
+ if token_type == CHARACTER and catcode == 6 then -- parameter
+ local next_token_number = token_number + 1
+ if next_token_number > parameter_text_token_range:stop() then -- not followed by anything, the parameter text is invalid
+ return nil
+ end
+ local next_token = transformed_tokens[next_token_number]
+ local next_token_type, next_payload, next_catcode, next_byte_range = table.unpack(next_token)
+ if next_token_type == CHARACTER and next_catcode == 6 then -- followed by another parameter (unrecognized nesting?)
+ return nil -- the text is invalid
+ elseif next_token_type == CHARACTER and lpeg.match(parsers.decimal_digit, next_payload) then -- followed by a digit
+ local next_digit = tonumber(next_payload)
+ assert(next_digit ~= nil)
+ if next_digit == num_parameters + 1 then -- a correct digit, increment the number of parameters
+ num_parameters = num_parameters + 1
+ else -- an incorrect digit, the parameter text is invalid
+ issues:add('e304', 'unexpected parameter number', next_byte_range)
+ return nil
+ end
+ elseif next_token_type == ARGUMENT then -- followed by a function call argument
+ -- the argument could be a correct digit, so let's increment the number of parameters
+ num_parameters = num_parameters + 1
+ else -- followed by some other token, the parameter text is invalid
+ return nil
+ end
end
end
+ return num_parameters
+ end
- -- Normalize common non-expl3 commands to expl3 equivalents.
- local function normalize_csname(csname)
- local next_token_number = token_number + 1
- local normalized_csname = csname
- local ignored_token_number
+ -- Normalize common non-expl3 commands to expl3 equivalents.
+ local function normalize_csname(csname)
+ local next_token_number = token_number + 1
+ local normalized_csname = csname
+ local ignored_token_number
- if csname == "let" then -- \let
- if token_number + 1 <= token_range:stop() then
- if tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
- if token_number + 2 <= token_range:stop() then
- if tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- normalized_csname = "cs_set_eq:NN" -- \let \csname \csname
- elseif tokens[token_number + 2][1] == CHARACTER then -- followed by a character
- if tokens[token_number + 2][2] == "=" then -- that is an equal sign
- if token_number + 3 <= token_range:stop() then
- if tokens[token_number + 3][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- ignored_token_number = token_number + 2
- normalized_csname = "cs_set_eq:NN" -- \let \csname = \csname
- end
+ if csname == "directlua" then -- \directlua
+ normalized_csname = "lua_now:e"
+ elseif csname == "let" then -- \let
+ if token_number + 1 <= token_range:stop() then
+ if transformed_tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
+ if token_number + 2 <= token_range:stop() then
+ if transformed_tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ normalized_csname = "cs_set_eq:NN" -- \let \csname \csname
+ elseif transformed_tokens[token_number + 2][1] == CHARACTER then -- followed by a character
+ if transformed_tokens[token_number + 2][2] == "=" then -- that is an equal sign
+ if token_number + 3 <= token_range:stop() then
+ if transformed_tokens[token_number + 3][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ ignored_token_number = token_number + 2
+ normalized_csname = "cs_set_eq:NN" -- \let \csname = \csname
end
end
end
@@ -74,43 +191,43 @@
end
end
end
- elseif csname == "def" or csname == "gdef" or csname == "edef" or csname == "xdef" then -- \?def
- if token_number + 1 <= token_range:stop() then
- if tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
- if csname == "def" then -- \def \csname
- normalized_csname = "cs_set:Npn"
- elseif csname == "gdef" then -- \gdef \csname
- normalized_csname = "cs_gset:Npn"
- elseif csname == "edef" then -- \edef \csname
- normalized_csname = "cs_set:Npe"
- elseif csname == "xdef" then -- \xdef \csname
- normalized_csname = "cs_set:Npx"
- else
- assert(false, csname)
- end
+ end
+ elseif csname == "def" or csname == "gdef" or csname == "edef" or csname == "xdef" then -- \?def
+ if token_number + 1 <= token_range:stop() then
+ if transformed_tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
+ if csname == "def" then -- \def \csname
+ normalized_csname = "cs_set:Npn"
+ elseif csname == "gdef" then -- \gdef \csname
+ normalized_csname = "cs_gset:Npn"
+ elseif csname == "edef" then -- \edef \csname
+ normalized_csname = "cs_set:Npe"
+ elseif csname == "xdef" then -- \xdef \csname
+ normalized_csname = "cs_set:Npx"
+ else
+ assert(false, csname)
end
end
- elseif csname == "global" then -- \global
- next_token_number = next_token_number + 1
- assert(next_token_number == token_number + 2)
- if token_number + 1 <= token_range:stop() then
- if tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
- csname = tokens[token_number + 1][2]
- if csname == "let" then -- \global \let
- if token_number + 2 <= token_range:stop() then
- if tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- if token_number + 3 <= token_range:stop() then
- if tokens[token_number + 3][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- normalized_csname = "cs_gset_eq:NN" -- \global \let \csname \csname
- goto skip_decrement
- elseif tokens[token_number + 3][1] == CHARACTER then -- followed by a character
- if tokens[token_number + 3][2] == "=" then -- that is an equal sign
- if token_number + 4 <= token_range:stop() then
- if tokens[token_number + 4][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- ignored_token_number = token_number + 3
- normalized_csname = "cs_gset_eq:NN" -- \global \let \csname = \csname
- goto skip_decrement
- end
+ end
+ elseif csname == "global" then -- \global
+ next_token_number = next_token_number + 1
+ assert(next_token_number == token_number + 2)
+ if token_number + 1 <= token_range:stop() then
+ if transformed_tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- followed by a control sequence
+ csname = transformed_tokens[token_number + 1][2]
+ if csname == "let" then -- \global \let
+ if token_number + 2 <= token_range:stop() then
+ if transformed_tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ if token_number + 3 <= token_range:stop() then
+ if transformed_tokens[token_number + 3][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ normalized_csname = "cs_gset_eq:NN" -- \global \let \csname \csname
+ goto skip_decrement
+ elseif transformed_tokens[token_number + 3][1] == CHARACTER then -- followed by a character
+ if transformed_tokens[token_number + 3][2] == "=" then -- that is an equal sign
+ if token_number + 4 <= token_range:stop() then
+ if transformed_tokens[token_number + 4][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ ignored_token_number = token_number + 3
+ normalized_csname = "cs_gset_eq:NN" -- \global \let \csname = \csname
+ goto skip_decrement
end
end
end
@@ -117,93 +234,193 @@
end
end
end
- elseif csname == "def" or csname == "gdef" or csname == "edef" or csname == "xdef" then -- \global \?def
- if token_number + 2 <= token_range:stop() then
- if tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
- if csname == "def" then -- \global \def \csname
- normalized_csname = "cs_gset:Npn"
- elseif csname == "gdef" then -- \global \gdef \csname
- normalized_csname = "cs_gset:Npn"
- elseif csname == "edef" then -- \global \edef \csname
- normalized_csname = "cs_gset:Npe"
- elseif csname == "xdef" then -- \global \xdef \csname
- normalized_csname = "cs_gset:Npx"
- else
- assert(false)
- end
- goto skip_decrement
+ end
+ elseif csname == "def" or csname == "gdef" or csname == "edef" or csname == "xdef" then -- \global \?def
+ if token_number + 2 <= token_range:stop() then
+ if transformed_tokens[token_number + 2][1] == CONTROL_SEQUENCE then -- followed by another control sequence
+ if csname == "def" then -- \global \def \csname
+ normalized_csname = "cs_gset:Npn"
+ elseif csname == "gdef" then -- \global \gdef \csname
+ normalized_csname = "cs_gset:Npn"
+ elseif csname == "edef" then -- \global \edef \csname
+ normalized_csname = "cs_gset:Npe"
+ elseif csname == "xdef" then -- \global \xdef \csname
+ normalized_csname = "cs_gset:Npx"
+ else
+ assert(false)
end
+ goto skip_decrement
end
end
end
end
- next_token_number = next_token_number - 1
- assert(next_token_number == token_number + 1)
- ::skip_decrement::
end
- return normalized_csname, next_token_number, ignored_token_number
+ next_token_number = next_token_number - 1
+ assert(next_token_number == token_number + 1)
+ ::skip_decrement::
end
+ return normalized_csname, next_token_number, ignored_token_number
+ end
- while token_number <= token_range:stop() do
- local token = tokens[token_number]
- local token_type, payload, _, byte_range = table.unpack(token)
- if token_type == CONTROL_SEQUENCE then -- a control sequence
- local original_csname = payload
- local csname, next_token_number, ignored_token_number = normalize_csname(original_csname)
- ::retry_control_sequence::
- local _, _, argument_specifiers = csname:find(":([^:]*)") -- try to extract a call
- if argument_specifiers ~= nil and lpeg.match(parsers.argument_specifiers, argument_specifiers) ~= nil then
- local arguments = {}
- local next_token, next_token_range
- local next_token_type, _, next_catcode, next_byte_range
- local next_grouping, parameter_text_start_token_number
- for argument_specifier in argument_specifiers:gmatch(".") do -- an expl3 control sequence, try to collect the arguments
- if lpeg.match(parsers.weird_argument_specifier, argument_specifier) then
- goto skip_other_token -- a "weird" argument specifier, skip the control sequence
- elseif lpeg.match(parsers.do_not_use_argument_specifier, argument_specifier) then
- goto skip_other_token -- a "do not use" argument specifier, skip the control sequence
+ while token_number <= transformed_token_range_end do
+ local token = transformed_tokens[token_number]
+ local token_type, payload, _, byte_range = table.unpack(token)
+ local next_token, next_next_token, next_token_range
+ if token_type == CONTROL_SEQUENCE then -- a control sequence
+ local original_csname = payload
+ local csname, next_token_number, ignored_token_number = normalize_csname(original_csname)
+ ::retry_control_sequence::
+ local _, _, argument_specifiers = csname:find(":([^:]*)") -- try to extract a call
+ if argument_specifiers ~= nil and lpeg.match(parsers.argument_specifiers, argument_specifiers) ~= nil then
+ local arguments = {}
+ local next_token_type, _, next_catcode, next_byte_range
+ local next_grouping, parameter_text_start_token_number
+ local num_parameters
+ local are_parameter_texts_valid = true
+ for argument_specifier in argument_specifiers:gmatch(".") do -- an expl3 control sequence, try to collect the arguments
+ if lpeg.match(parsers.weird_argument_specifier, argument_specifier) then
+ goto skip_other_token -- a "weird" argument specifier, skip the control sequence
+ elseif lpeg.match(parsers.do_not_use_argument_specifier, argument_specifier) then
+ goto skip_other_token -- a "do not use" argument specifier, skip the control sequence
+ end
+ ::check_token::
+ if next_token_number > transformed_token_range_end then -- missing argument (partial application?), skip all remaining tokens
+ if token_range:stop() == #tokens then
+ if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
+ csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
+ goto retry_control_sequence
+ else
+ issues:add('e301', 'end of expl3 part within function call', byte_range)
+ end
end
- ::check_token::
- if next_token_number > token_range:stop() then -- missing argument (partial application?), skip all remaining tokens
- if token_range:stop() == #tokens then
+ next_token_range = new_range(token_number, transformed_token_range_end, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = next_token_number
+ goto continue
+ end
+ next_token = transformed_tokens[next_token_number]
+ next_token_type, _, next_catcode, next_byte_range = table.unpack(next_token)
+ if ignored_token_number ~= nil and next_token_number == ignored_token_number then
+ next_token_number = next_token_number + 1
+ goto check_token
+ end
+ if lpeg.match(parsers.parameter_argument_specifier, argument_specifier) then
+ parameter_text_start_token_number = next_token_number -- a "TeX parameter" argument specifier, try to collect parameter text
+ while next_token_number <= transformed_token_range_end do
+ next_token = transformed_tokens[next_token_number]
+ next_token_type, _, next_catcode, next_byte_range = table.unpack(next_token)
+ if next_token_type == CHARACTER and next_catcode == 2 then -- end grouping, skip the control sequence
if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
goto retry_control_sequence
else
- issues:add('e301', 'end of expl3 part within function call', byte_range)
+ issues:add('e300', 'unexpected function call argument', next_byte_range)
+ goto skip_other_token
end
+ elseif next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, validate and record the parameter text
+ next_token_number = next_token_number - 1
+ next_token_range
+ = new_range(parameter_text_start_token_number, next_token_number, INCLUSIVE + MAYBE_EMPTY, #transformed_tokens)
+ num_parameters = count_parameters_in_parameter_text(next_token_range)
+ if num_parameters == nil then
+ are_parameter_texts_valid = false
+ end
+ table.insert(arguments, {argument_specifier, next_token_range, num_parameters})
+ break
end
- record_other_tokens(new_range(token_number, token_range:stop(), INCLUSIVE, #tokens))
+ next_token_number = next_token_number + 1
+ end
+ if next_token_number > transformed_token_range_end then -- missing begin grouping (partial application?)
+ if token_range:stop() == #tokens then -- skip all remaining tokens
+ if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
+ csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
+ goto retry_control_sequence
+ else
+ issues:add('e301', 'end of expl3 part within function call', next_byte_range)
+ end
+ end
+ next_token_range = new_range(token_number, transformed_token_range_end, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
token_number = next_token_number
goto continue
end
- next_token = tokens[next_token_number]
- next_token_type, _, next_catcode, next_byte_range = table.unpack(next_token)
- if ignored_token_number ~= nil and next_token_number == ignored_token_number then
- next_token_number = next_token_number + 1
- goto check_token
- end
- if lpeg.match(parsers.parameter_argument_specifier, argument_specifier) then
- parameter_text_start_token_number = next_token_number -- a "TeX parameter" argument specifier, try to collect parameter text
- while next_token_number <= token_range:stop() do
- next_token = tokens[next_token_number]
- next_token_type, _, next_catcode, next_byte_range = table.unpack(next_token)
- if next_token_type == CHARACTER and next_catcode == 2 then -- end grouping, skip the control sequence
+ elseif lpeg.match(parsers.N_type_argument_specifier, argument_specifier) then -- an N-type argument specifier
+ if next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, try to collect the balanced text
+ next_grouping = groupings[map_back(next_token_number)]
+ assert(next_grouping ~= nil)
+ assert(map_forward(next_grouping.start) == next_token_number)
+ if next_grouping.stop == nil then -- an unclosed grouping, skip the control sequence
+ if token_range:stop() == #tokens then
if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
goto retry_control_sequence
else
+ issues:add('e301', 'end of expl3 part within function call', next_byte_range)
+ end
+ end
+ goto skip_other_token
+ else -- a balanced text
+ next_token_range = new_range(
+ map_forward(next_grouping.start + 1),
+ map_forward(next_grouping.stop - 1),
+ INCLUSIVE + MAYBE_EMPTY,
+ #transformed_tokens,
+ map_back,
+ #tokens
+ )
+ if #next_token_range == 1 then -- a single token, record it
+ issues:add('w303', 'braced N-type function call argument', next_byte_range)
+ table.insert(arguments, {argument_specifier, next_token_range})
+ next_token_number = map_forward(next_grouping.stop)
+ elseif #next_token_range == 2 and -- two tokens
+ tokens[next_token_range:start()][1] == CHARACTER and tokens[next_token_range:start()][3] == 6 and -- a parameter
+ (tokens[next_token_range:stop()][1] == ARGUMENT or -- followed by a function call argument (could be a digit)
+ tokens[next_token_range:stop()][1] == CHARACTER and -- or an actual digit (unrecognized parameter/replacement text?)
+ lpeg.match(parsers.decimal_digit, tokens[next_token_range:stop()][2])) then -- skip all tokens
+ next_token_range
+ = new_range(token_number, map_forward(next_grouping.stop), INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = map_forward(next_grouping.stop + 1)
+ goto continue
+ else -- no token / more than one token, skip the control sequence
+ if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
+ csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
+ goto retry_control_sequence
+ else
issues:add('e300', 'unexpected function call argument', next_byte_range)
goto skip_other_token
end
- elseif next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, record the parameter text
- next_token_number = next_token_number - 1
- table.insert(arguments, new_range(parameter_text_start_token_number, next_token_number, INCLUSIVE + MAYBE_EMPTY, #tokens))
- break
end
- next_token_number = next_token_number + 1
end
- if next_token_number > token_range:stop() then -- missing begin grouping (partial application?), skip all remaining tokens
+ elseif next_token_type == CHARACTER and next_catcode == 2 then -- end grouping (partial application?), skip all tokens
+ next_token_range = new_range(token_number, next_token_number, EXCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = next_token_number
+ goto continue
+ else
+ if next_token_type == CHARACTER and next_catcode == 6 then -- a parameter
+ if next_token_number + 1 <= transformed_token_range_end then -- followed by one other token
+ next_next_token = transformed_tokens[next_token_number + 1]
+ if next_next_token[1] == ARGUMENT or -- that is either a function call argument (could be a digit)
+ next_next_token[1] == CHARACTER and -- or an actual digit (unrecognized parameter/replacement text?)
+ lpeg.match(parsers.decimal_digit, next_next_token[2]) then -- skip all tokens
+ next_token_range = new_range(token_number, next_token_number + 1, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = next_token_number + 2
+ goto continue
+ end
+ end
+ end
+ -- an N-type argument, record it
+ next_token_range = new_range(next_token_number, next_token_number, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ table.insert(arguments, {argument_specifier, next_token_range})
+ end
+ elseif lpeg.match(parsers.n_type_argument_specifier, argument_specifier) then -- an n-type argument specifier
+ if next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, try to collect the balanced text
+ next_grouping = groupings[map_back(next_token_number)]
+ assert(next_grouping ~= nil)
+ assert(map_forward(next_grouping.start) == next_token_number)
+ if next_grouping.stop == nil then -- an unclosed grouping, skip the control sequence
if token_range:stop() == #tokens then
if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
@@ -212,146 +429,98 @@
issues:add('e301', 'end of expl3 part within function call', next_byte_range)
end
end
- record_other_tokens(new_range(token_number, token_range:stop(), INCLUSIVE, #tokens))
- token_number = next_token_number
- goto continue
+ goto skip_other_token
+ else -- a balanced text, record it
+ next_token_range = new_range(
+ map_forward(next_grouping.start + 1),
+ map_forward(next_grouping.stop - 1),
+ INCLUSIVE + MAYBE_EMPTY,
+ #transformed_tokens,
+ map_back,
+ #tokens
+ )
+ table.insert(arguments, {argument_specifier, next_token_range})
+ next_token_number = map_forward(next_grouping.stop)
end
- elseif lpeg.match(parsers.N_type_argument_specifier, argument_specifier) then -- an N-type argument specifier
- if next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, try to collect the balanced text
- next_grouping = groupings[next_token_number]
- assert(next_grouping ~= nil)
- assert(next_grouping.start == next_token_number)
- if next_grouping.stop == nil then -- an unclosed grouping, skip the control sequence
- if token_range:stop() == #tokens then
- if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
- csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
- goto retry_control_sequence
- else
- issues:add('e301', 'end of expl3 part within function call', next_byte_range)
- end
- end
- goto skip_other_token
- else -- a balanced text
- next_token_range = new_range(next_grouping.start + 1, next_grouping.stop - 1, INCLUSIVE + MAYBE_EMPTY, #tokens)
- if #next_token_range == 1 then -- a single token, record it
- issues:add('w303', 'braced N-type function call argument', next_byte_range)
- table.insert(arguments, next_token_range)
- next_token_number = next_grouping.stop
- elseif #next_token_range == 2 and -- two tokens
- tokens[next_token_range:start()][1] == CHARACTER and tokens[next_token_range:start()][3] == 6 and -- a parameter
- tokens[next_token_range:stop()][1] == CHARACTER and -- followed by a digit (unrecognized parameter/replacement text?)
- lpeg.match(parsers.decimal_digit, tokens[next_token_range:stop()][2]) then
- record_other_tokens(new_range(token_number, next_grouping.stop, INCLUSIVE, #tokens))
- token_number = next_grouping.stop + 1
+ elseif next_token_type == CHARACTER and next_catcode == 2 then -- end grouping (partial application?), skip all tokens
+ next_token_range = new_range(token_number, next_token_number, EXCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = next_token_number
+ goto continue
+ else -- not begin grouping
+ if next_token_type == CHARACTER and next_catcode == 6 then -- a parameter
+ if next_token_number + 1 <= transformed_token_range_end then -- followed by one other token
+ next_next_token = transformed_tokens[next_token_number + 1]
+ if next_next_token[1] == ARGUMENT or -- that is either a function call argument (could be a digit)
+ next_next_token[1] == CHARACTER and -- or an actual digit (unrecognized parameter/replacement text?)
+ lpeg.match(parsers.decimal_digit, next_next_token[2]) then -- skip all tokens
+ next_token_range = new_range(token_number, next_token_number + 1, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = next_token_number + 2
goto continue
- else -- no token / more than one token, skip the control sequence
- if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
- csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
- goto retry_control_sequence
- else
- issues:add('e300', 'unexpected function call argument', next_byte_range)
- goto skip_other_token
- end
end
end
- elseif next_token_type == CHARACTER and next_catcode == 2 then -- end grouping (partial application?), skip all tokens
- record_other_tokens(new_range(token_number, next_token_number, EXCLUSIVE, #tokens))
- token_number = next_token_number
- goto continue
- else
- if next_token_type == CHARACTER and next_catcode == 6 then -- a parameter
- if next_token_number + 1 <= token_range:stop() then -- followed by one other token
- if tokens[next_token_number + 1][1] == CHARACTER and -- that is a digit (unrecognized parameter/replacement text?)
- lpeg.match(parsers.decimal_digit, tokens[next_token_number + 1][2]) then -- skip all tokens
- record_other_tokens(new_range(token_number, next_token_number + 1, INCLUSIVE, #tokens))
- token_number = next_token_number + 2
- goto continue
- end
- end
- end
- -- an N-type argument, record it
- table.insert(arguments, new_range(next_token_number, next_token_number, INCLUSIVE, #tokens))
end
- elseif lpeg.match(parsers.n_type_argument_specifier, argument_specifier) then -- an n-type argument specifier
- if next_token_type == CHARACTER and next_catcode == 1 then -- begin grouping, try to collect the balanced text
- next_grouping = groupings[next_token_number]
- assert(next_grouping ~= nil)
- assert(next_grouping.start == next_token_number)
- if next_grouping.stop == nil then -- an unclosed grouping, skip the control sequence
- if token_range:stop() == #tokens then
- if csname ~= original_csname then -- before recording an error, retry without trying to understand non-expl3
- csname, next_token_number, ignored_token_number = original_csname, token_number + 1, nil
- goto retry_control_sequence
- else
- issues:add('e301', 'end of expl3 part within function call', next_byte_range)
- end
- end
- goto skip_other_token
- else -- a balanced text, record it
- table.insert(arguments, new_range(next_grouping.start + 1, next_grouping.stop - 1, INCLUSIVE + MAYBE_EMPTY, #tokens))
- next_token_number = next_grouping.stop
- end
- elseif next_token_type == CHARACTER and next_catcode == 2 then -- end grouping (partial application?), skip all tokens
- record_other_tokens(new_range(token_number, next_token_number, EXCLUSIVE, #tokens))
- token_number = next_token_number
- goto continue
- else -- not begin grouping
- if next_token_type == CHARACTER and next_catcode == 6 then -- a parameter
- if next_token_number + 1 <= token_range:stop() then -- followed by one other token
- if tokens[next_token_number + 1][1] == CHARACTER and -- that is a digit (unrecognized parameter/replacement text?)
- lpeg.match(parsers.decimal_digit, tokens[next_token_number + 1][2]) then -- skip all tokens
- record_other_tokens(new_range(token_number, next_token_number + 1, INCLUSIVE, #tokens))
- token_number = next_token_number + 2
- goto continue
- end
- end
- end
- -- an unbraced n-type argument, record it
- issues:add('w302', 'unbraced n-type function call argument', next_byte_range)
- table.insert(arguments, new_range(next_token_number, next_token_number, INCLUSIVE, #tokens))
- end
- else
- error('Unexpected argument specifier "' .. argument_specifier .. '"')
+ -- an unbraced n-type argument, record it
+ issues:add('w302', 'unbraced n-type function call argument', next_byte_range)
+ next_token_range = new_range(next_token_number, next_token_number, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ table.insert(arguments, {argument_specifier, next_token_range})
end
- next_token_number = next_token_number + 1
+ else
+ error('Unexpected argument specifier "' .. argument_specifier .. '"')
end
- table.insert(calls, {CALL, new_range(token_number, next_token_number, EXCLUSIVE, #tokens), csname, arguments})
- token_number = next_token_number
- goto continue
- else -- a non-expl3 control sequence, skip it
- goto skip_other_token
+ next_token_number = next_token_number + 1
end
- elseif token_type == CHARACTER then -- an ordinary character
- if payload == "=" then -- an equal sign
- if token_number + 2 <= token_range:stop() then -- followed by two other tokens
- if tokens[token_number + 1][1] == CONTROL_SEQUENCE then -- the first being a control sequence
- if tokens[token_number + 2][1] == CHARACTER and tokens[token_number + 2][2] == "," then -- and the second being a comma
- -- (probably l3keys definition?), skip all three tokens
- record_other_tokens(new_range(token_number, token_number + 2, INCLUSIVE, #tokens))
- token_number = token_number + 3
- goto continue
- end
+ next_token_range = new_range(token_number, next_token_number, EXCLUSIVE, #transformed_tokens, map_back, #tokens)
+ if are_parameter_texts_valid then -- if all "TeX parameter" arguments are valid, record the call
+ table.insert(calls, {CALL, next_token_range, csname, arguments})
+ else -- otherwise, skip all tokens from the call
+ record_other_tokens(next_token_range)
+ end
+ token_number = next_token_number
+ goto continue
+ else -- a non-expl3 control sequence, skip it
+ goto skip_other_token
+ end
+ elseif token_type == CHARACTER then -- an ordinary character
+ if payload == "=" then -- an equal sign
+ if token_number + 2 <= transformed_token_range_end then -- followed by two other tokens
+ next_token = transformed_tokens[token_number + 1]
+ if next_token[1] == CONTROL_SEQUENCE then -- the first being a control sequence
+ next_next_token = transformed_tokens[token_number + 2]
+ if next_next_token[1] == CHARACTER and next_next_token[2] == "," then -- and the second being a comma
+ -- (probably l3keys definition?), skip all three tokens
+ next_token_range = new_range(token_number, token_number + 2, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = token_number + 3
+ goto continue
end
end
end
- -- an ordinary character, skip it
- goto skip_other_token
- else
- error('Unexpected token type "' .. token_type .. '"')
end
- ::skip_other_token::
- record_other_tokens(new_range(token_number, token_number, INCLUSIVE, #tokens))
- token_number = token_number + 1
- ::continue::
+ -- an ordinary character, skip it
+ goto skip_other_token
+ elseif token_type == ARGUMENT then -- a function call argument, skip it
+ goto skip_other_token
+ else
+ error('Unexpected token type "' .. token_type .. '"')
end
- return calls
+ ::skip_other_token::
+ next_token_range = new_range(token_number, token_number, INCLUSIVE, #transformed_tokens, map_back, #tokens)
+ record_other_tokens(next_token_range)
+ token_number = token_number + 1
+ ::continue::
end
+ return calls
+end
+-- Convert the tokens to a tree of top-level function calls and register any issues.
+local function syntactic_analysis(pathname, content, issues, results, options) -- luacheck: ignore pathname content options
local calls = {}
for part_number, part_tokens in ipairs(results.tokens) do
local part_groupings = results.groupings[part_number]
local part_token_range = new_range(1, #part_tokens, INCLUSIVE, #part_tokens)
- local part_calls = get_calls(part_tokens, part_token_range, part_groupings)
+ local part_calls = get_calls(part_tokens, part_tokens, part_token_range, identity, identity, issues, part_groupings)
table.insert(calls, part_calls)
end
@@ -360,6 +529,8 @@
end
return {
+ get_calls = get_calls,
process = syntactic_analysis,
- call_types = call_types
+ call_types = call_types,
+ transform_replacement_text_tokens = transform_replacement_text_tokens,
}
Modified: trunk/Master/texmf-dist/scripts/expltools/explcheck-utils.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/expltools/explcheck-utils.lua 2025-04-24 23:42:35 UTC (rev 75016)
+++ trunk/Master/texmf-dist/scripts/expltools/explcheck-utils.lua 2025-04-25 21:14:22 UTC (rev 75017)
@@ -35,9 +35,15 @@
end
end
+--- Return all parameters unchanged, mostly used for no-op map-back and map-forward functions.
+local function identity(...)
+ return ...
+end
+
return {
convert_byte_to_line_and_column = convert_byte_to_line_and_column,
get_basename = get_basename,
get_parent = get_parent,
get_suffix = get_suffix,
+ identity = identity,
}
More information about the tex-live-commits
mailing list.