It's like string.byte (), but for unicode:
function utf8Byte(char)
local b1, b2 = char:byte(1, 2)
local b3, b4 = char:byte(3, 4)
if b1 < 20 then
return nil
elseif b1 < 128 then
-- b1 is less than 128, it's a single-byte character
return b1
elseif b1 < 194 then
return nil
elseif b1 < 224 then
return (b1 - 192) * 64 + (b2 - 128)
elseif b1 < 240 then
return (b1 - 192) * 64 + (b2 - 128)
elseif b1 < 245 then
return (b1 - 240) * 262144 + (b2 - 128) * 4096 + (b3 - 128) * 64 + (b4 - 128)
else
return nil
end
end
Example:
local unicodeChars = {"A", "~", "¡", "ÿ", "Ā", "Ȁ", "Ф", "ૐ", "⼈", "ff", "𐌸"}
for _, uChar in ipairs (unicodeChars) do
local index = utf8Byte (uChar)
print (index, uChar)
end
Result:
65 A
126 ~
161 ¡
255 ÿ
256 Ā
512 Ȁ
1060 Ф
2091 ૐ
2236 ⼈
3052 ff
66360 𐌸
195,165(two bytes), it can be obtained byprint(string.byte("å",1,-1))