[latex3-commits] [git/LaTeX3-latex3-luaotfload] dev: Decode invalid UTF-8 as MacRoman (9aa4b4d)
Marcel Fabian Krüger
tex at 2krueger.de
Tue Aug 20 14:47:46 CEST 2019
Repository : https://github.com/latex3/luaotfload
On branch : dev
Link : https://github.com/latex3/luaotfload/commit/9aa4b4d7dffd02382b30dea232b6d5d0a0a0e682
>---------------------------------------------------------------
commit 9aa4b4d7dffd02382b30dea232b6d5d0a0a0e682
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date: Tue Aug 20 14:47:46 2019 +0200
Decode invalid UTF-8 as MacRoman
>---------------------------------------------------------------
9aa4b4d7dffd02382b30dea232b6d5d0a0a0e682
src/luaotfload-database.lua | 66 ++++++++++++++++------
.../latex-dev/luaotfload/luaotfload-database.lua | 66 ++++++++++++++++------
2 files changed, 98 insertions(+), 34 deletions(-)
diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua
index 67bc0a7..1092bd5 100644
--- a/src/luaotfload-database.lua
+++ b/src/luaotfload-database.lua
@@ -239,15 +239,53 @@ end
--- string -> string
-local invalidchars = "[^%a%d]"
-
+local macroman2utf8 do
+ local mapping = {
+ [0x80] = 0x00C4, [0x81] = 0x00C5, [0x82] = 0x00C7, [0x83] = 0x00C9,
+ [0x84] = 0x00D1, [0x85] = 0x00D6, [0x86] = 0x00DC, [0x87] = 0x00E1,
+ [0x88] = 0x00E0, [0x89] = 0x00E2, [0x8A] = 0x00E4, [0x8B] = 0x00E3,
+ [0x8C] = 0x00E5, [0x8D] = 0x00E7, [0x8E] = 0x00E9, [0x8F] = 0x00E8,
+ [0x90] = 0x00EA, [0x91] = 0x00EB, [0x92] = 0x00ED, [0x93] = 0x00EC,
+ [0x94] = 0x00EE, [0x95] = 0x00EF, [0x96] = 0x00F1, [0x97] = 0x00F3,
+ [0x98] = 0x00F2, [0x99] = 0x00F4, [0x9A] = 0x00F6, [0x9B] = 0x00F5,
+ [0x9C] = 0x00FA, [0x9D] = 0x00F9, [0x9E] = 0x00FB, [0x9F] = 0x00FC,
+ [0xA0] = 0x2020, [0xA1] = 0x00B0, [0xA2] = 0x00A2, [0xA3] = 0x00A3,
+ [0xA4] = 0x00A7, [0xA5] = 0x2022, [0xA6] = 0x00B6, [0xA7] = 0x00DF,
+ [0xA8] = 0x00AE, [0xA9] = 0x00A9, [0xAA] = 0x2122, [0xAB] = 0x00B4,
+ [0xAC] = 0x00A8, [0xAD] = 0x2260, [0xAE] = 0x00C6, [0xAF] = 0x00D8,
+ [0xB0] = 0x221E, [0xB1] = 0x00B1, [0xB2] = 0x2264, [0xB3] = 0x2265,
+ [0xB4] = 0x00A5, [0xB5] = 0x00B5, [0xB6] = 0x2202, [0xB7] = 0x2211,
+ [0xB8] = 0x220F, [0xB9] = 0x03C0, [0xBA] = 0x222B, [0xBB] = 0x00AA,
+ [0xBC] = 0x00BA, [0xBD] = 0x03A9, [0xBE] = 0x00E6, [0xBF] = 0x00F8,
+ [0xC0] = 0x00BF, [0xC1] = 0x00A1, [0xC2] = 0x00AC, [0xC3] = 0x221A,
+ [0xC4] = 0x0192, [0xC5] = 0x2248, [0xC6] = 0x2206, [0xC7] = 0x00AB,
+ [0xC8] = 0x00BB, [0xC9] = 0x2026, [0xCA] = 0x00A0, [0xCB] = 0x00C0,
+ [0xCC] = 0x00C3, [0xCD] = 0x00D5, [0xCE] = 0x0152, [0xCF] = 0x0153,
+ [0xD0] = 0x2013, [0xD1] = 0x2014, [0xD2] = 0x201C, [0xD3] = 0x201D,
+ [0xD4] = 0x2018, [0xD5] = 0x2019, [0xD6] = 0x00F7, [0xD7] = 0x25CA,
+ [0xD8] = 0x00FF, [0xD9] = 0x0178, [0xDA] = 0x2044, [0xDB] = 0x20AC,
+ [0xDC] = 0x2039, [0xDD] = 0x203A, [0xDE] = 0xFB01, [0xDF] = 0xFB02,
+ [0xE0] = 0x2021, [0xE1] = 0x00B7, [0xE2] = 0x201A, [0xE3] = 0x201E,
+ [0xE4] = 0x2030, [0xE5] = 0x00C2, [0xE6] = 0x00CA, [0xE7] = 0x00C1,
+ [0xE8] = 0x00CB, [0xE9] = 0x00C8, [0xEA] = 0x00CD, [0xEB] = 0x00CE,
+ [0xEC] = 0x00CF, [0xED] = 0x00CC, [0xEE] = 0x00D3, [0xEF] = 0x00D4,
+ [0xF0] = 0xF8FF, [0xF1] = 0x00D2, [0xF2] = 0x00DA, [0xF3] = 0x00DB,
+ [0xF4] = 0x00D9, [0xF5] = 0x0131, [0xF6] = 0x02C6, [0xF7] = 0x02DC,
+ [0xF8] = 0x00AF, [0xF9] = 0x02D8, [0xFA] = 0x02D9, [0xFB] = 0x02DA,
+ [0xFC] = 0x00B8, [0xFD] = 0x02DD, [0xFE] = 0x02DB, [0xFF] = 0x02C7,
+ }
+ function macroman2utf8(s)
+ local bytes = {string.byte(s, 1, -1)}
+ for i=1,#bytes do
+ bytes[i] = mapping[bytes[i]] or bytes[i]
+ end
+ return utf8.char(table.unpack(bytes))
+ end
+end
local sanitize_fontname = function (str)
if str ~= nil then
- if utf8len(str) then
- str = alphnum_only(casefold(str, true))
- else
- str = stringgsub(stringlower(str), invalidchars, "")
- end
+ str = utf8len(str) and str or macroman2utf8(str)
+ str = alphnum_only(casefold(str, true))
return str
end
return nil
@@ -258,19 +296,13 @@ local sanitize_fontnames = function (rawnames)
for category, namedata in next, rawnames do
if type (namedata) == "string" then
- if utf8len(namedata) then
- result [category] = alphnum_only(casefold(namedata, true))
- else
- result [category] = stringgsub(stringlower(namedata), invalidchars, "")
- end
+ namedata = utf8len(namedata) and namedata or macroman2utf8(namedata)
+ result [category] = alphnum_only(casefold(namedata, true))
else
local target = { }
for field, name in next, namedata do
- if utf8len(name) then
- target [field] = alphnum_only(casefold(name, true))
- else
- target [field] = stringgsub(stringlower(name), invalidchars, "")
- end
+ name = utf8len(name) and name or macroman2utf8(name)
+ target [field] = alphnum_only(casefold(name, true))
end
result [category] = target
end
diff --git a/texmf/tex/latex-dev/luaotfload/luaotfload-database.lua b/texmf/tex/latex-dev/luaotfload/luaotfload-database.lua
index 67bc0a7..1092bd5 100644
--- a/texmf/tex/latex-dev/luaotfload/luaotfload-database.lua
+++ b/texmf/tex/latex-dev/luaotfload/luaotfload-database.lua
@@ -239,15 +239,53 @@ end
--- string -> string
-local invalidchars = "[^%a%d]"
-
+local macroman2utf8 do
+ local mapping = {
+ [0x80] = 0x00C4, [0x81] = 0x00C5, [0x82] = 0x00C7, [0x83] = 0x00C9,
+ [0x84] = 0x00D1, [0x85] = 0x00D6, [0x86] = 0x00DC, [0x87] = 0x00E1,
+ [0x88] = 0x00E0, [0x89] = 0x00E2, [0x8A] = 0x00E4, [0x8B] = 0x00E3,
+ [0x8C] = 0x00E5, [0x8D] = 0x00E7, [0x8E] = 0x00E9, [0x8F] = 0x00E8,
+ [0x90] = 0x00EA, [0x91] = 0x00EB, [0x92] = 0x00ED, [0x93] = 0x00EC,
+ [0x94] = 0x00EE, [0x95] = 0x00EF, [0x96] = 0x00F1, [0x97] = 0x00F3,
+ [0x98] = 0x00F2, [0x99] = 0x00F4, [0x9A] = 0x00F6, [0x9B] = 0x00F5,
+ [0x9C] = 0x00FA, [0x9D] = 0x00F9, [0x9E] = 0x00FB, [0x9F] = 0x00FC,
+ [0xA0] = 0x2020, [0xA1] = 0x00B0, [0xA2] = 0x00A2, [0xA3] = 0x00A3,
+ [0xA4] = 0x00A7, [0xA5] = 0x2022, [0xA6] = 0x00B6, [0xA7] = 0x00DF,
+ [0xA8] = 0x00AE, [0xA9] = 0x00A9, [0xAA] = 0x2122, [0xAB] = 0x00B4,
+ [0xAC] = 0x00A8, [0xAD] = 0x2260, [0xAE] = 0x00C6, [0xAF] = 0x00D8,
+ [0xB0] = 0x221E, [0xB1] = 0x00B1, [0xB2] = 0x2264, [0xB3] = 0x2265,
+ [0xB4] = 0x00A5, [0xB5] = 0x00B5, [0xB6] = 0x2202, [0xB7] = 0x2211,
+ [0xB8] = 0x220F, [0xB9] = 0x03C0, [0xBA] = 0x222B, [0xBB] = 0x00AA,
+ [0xBC] = 0x00BA, [0xBD] = 0x03A9, [0xBE] = 0x00E6, [0xBF] = 0x00F8,
+ [0xC0] = 0x00BF, [0xC1] = 0x00A1, [0xC2] = 0x00AC, [0xC3] = 0x221A,
+ [0xC4] = 0x0192, [0xC5] = 0x2248, [0xC6] = 0x2206, [0xC7] = 0x00AB,
+ [0xC8] = 0x00BB, [0xC9] = 0x2026, [0xCA] = 0x00A0, [0xCB] = 0x00C0,
+ [0xCC] = 0x00C3, [0xCD] = 0x00D5, [0xCE] = 0x0152, [0xCF] = 0x0153,
+ [0xD0] = 0x2013, [0xD1] = 0x2014, [0xD2] = 0x201C, [0xD3] = 0x201D,
+ [0xD4] = 0x2018, [0xD5] = 0x2019, [0xD6] = 0x00F7, [0xD7] = 0x25CA,
+ [0xD8] = 0x00FF, [0xD9] = 0x0178, [0xDA] = 0x2044, [0xDB] = 0x20AC,
+ [0xDC] = 0x2039, [0xDD] = 0x203A, [0xDE] = 0xFB01, [0xDF] = 0xFB02,
+ [0xE0] = 0x2021, [0xE1] = 0x00B7, [0xE2] = 0x201A, [0xE3] = 0x201E,
+ [0xE4] = 0x2030, [0xE5] = 0x00C2, [0xE6] = 0x00CA, [0xE7] = 0x00C1,
+ [0xE8] = 0x00CB, [0xE9] = 0x00C8, [0xEA] = 0x00CD, [0xEB] = 0x00CE,
+ [0xEC] = 0x00CF, [0xED] = 0x00CC, [0xEE] = 0x00D3, [0xEF] = 0x00D4,
+ [0xF0] = 0xF8FF, [0xF1] = 0x00D2, [0xF2] = 0x00DA, [0xF3] = 0x00DB,
+ [0xF4] = 0x00D9, [0xF5] = 0x0131, [0xF6] = 0x02C6, [0xF7] = 0x02DC,
+ [0xF8] = 0x00AF, [0xF9] = 0x02D8, [0xFA] = 0x02D9, [0xFB] = 0x02DA,
+ [0xFC] = 0x00B8, [0xFD] = 0x02DD, [0xFE] = 0x02DB, [0xFF] = 0x02C7,
+ }
+ function macroman2utf8(s)
+ local bytes = {string.byte(s, 1, -1)}
+ for i=1,#bytes do
+ bytes[i] = mapping[bytes[i]] or bytes[i]
+ end
+ return utf8.char(table.unpack(bytes))
+ end
+end
local sanitize_fontname = function (str)
if str ~= nil then
- if utf8len(str) then
- str = alphnum_only(casefold(str, true))
- else
- str = stringgsub(stringlower(str), invalidchars, "")
- end
+ str = utf8len(str) and str or macroman2utf8(str)
+ str = alphnum_only(casefold(str, true))
return str
end
return nil
@@ -258,19 +296,13 @@ local sanitize_fontnames = function (rawnames)
for category, namedata in next, rawnames do
if type (namedata) == "string" then
- if utf8len(namedata) then
- result [category] = alphnum_only(casefold(namedata, true))
- else
- result [category] = stringgsub(stringlower(namedata), invalidchars, "")
- end
+ namedata = utf8len(namedata) and namedata or macroman2utf8(namedata)
+ result [category] = alphnum_only(casefold(namedata, true))
else
local target = { }
for field, name in next, namedata do
- if utf8len(name) then
- target [field] = alphnum_only(casefold(name, true))
- else
- target [field] = stringgsub(stringlower(name), invalidchars, "")
- end
+ name = utf8len(name) and name or macroman2utf8(name)
+ target [field] = alphnum_only(casefold(name, true))
end
result [category] = target
end
More information about the latex3-commits
mailing list