Modul:Homokozó/TG/StringFunctions
Homokozó/TG/StringFunctions[mi ez?] • [dokumentáció: mutat, ] • [tesztek: létrehozás]
-- basic string manipulation functionsww
local StringFunctions = {}
function StringFunctions.trim(str)
if str.args then
str = str.args[1]
end
if str == nil then
return nil
else
return (str:gsub("^%s*(.-)%s*$", "%1")) -- extra brackets are necessary because gsub returns multiple values
end
end
-- splits a string into (Unicode) characters
-- returns an iterator
-- behavior is undefined for input which is invalid UTF-8
function StringFunctions.split(str)
local i, error = 1, false
return function()
if error then
return nil
end
local byte = str:byte(i)
if byte == nil then
return nil
end
-- determine number of 1 bits before the first 0 in byte
local leadBits, bitValue, remainder = 0, 128, byte
while bitValue <= remainder and bitValue > 1 do
leadBits = leadBits + 1
remainder = remainder - bitValue
bitValue = bitValue / 2
end
local chr, length -- the next UTF-8 character and its length in bytes
if leadBits == 0 then -- ASCII character
length = 1
elseif leadBits == 1 or leadBits > 6 then -- not valid UTF-8
error = true
return '<error(' + i + ':' + byte + ')>'
else
length = leadBits
end
chr = str:sub(i, i + length - 1)
i = i + length
return chr
end
end
-- UTF-8 aware version of string:len
function StringFunctions.len(str)
local i = 0
for c in StringFunctions.split(str) do
i = i + 1
end
return i
end
-- UTF-8 aware version of string:sub
function StringFunctions.sub(str, i, j)
if i < 0 then
i = StringFunctions.len(str) + i + 1
end
if j and j < 0 then
j = StringFunctions.len(str) + j + 1
end
local pos, substr = 0, ''
for c in StringFunctions.split(str) do
pos = pos + 1
if pos >= i and (not j or pos <= j) then
substr = substr .. c
end
end
return substr
end
return StringFunctions