Module:DecodeEncode: Difference between revisions

From The Goon Show Depository

en>Matthiaspaul
(add workaround for & thinsp ;)
 
No edit summary
 
(7 intermediate revisions by 4 users not shown)
Line 1: Line 1:
--require('strict')
local p = {}
local p = {}


function _getBoolean( boolean_str )
local function _getBoolean( boolean_str )
-- from: module:String; adapted
-- from: module:String; adapted
-- requires an explicit true
-- requires an explicit true
Line 22: Line 23:


function p.decode( frame )
function p.decode( frame )
local s
local s = frame.args['s'] or ''
local subset_only
local subset_only = _getBoolean(frame.args['subset_only'] or false)
 
s = frame.args['s'] or ''
subset_only = _getBoolean(frame.args['subset_only'] or false)


return p._decode( s, subset_only )
return p._decode( s, subset_only )
Line 32: Line 30:


function p._decode( s, subset_only )
function p._decode( s, subset_only )
local ret = nil;
-- U+2009 THIN SPACE: workaround for bug: HTML entity   is decoded incorrect. Entity   gets decoded properly
s = mw.ustring.gsub( s, ' ', ' ' )
-- U+03B5 ε GREEK SMALL LETTER EPSILON: workaround for bug (phab:T328840): HTML entity ε is decoded incorrect for gsub(). Entity ε gets decoded properly
s = mw.ustring.gsub( s, 'ε', 'ε' )


    s = mw.ustring.gsub( s, ' ', ' ' ) -- Workaround for bug:   gets properly decoded in decode, but   doesn't.
local ret = mw.text.decode( s, not subset_only )
 
ret = mw.text.decode( s, not subset_only )


return ret
return ret
Line 42: Line 41:


function p.encode( frame )
function p.encode( frame )
local s
local s = frame.args['s'] or ''
local charset
local charset = frame.args['charset']
 
s = frame.args['s'] or ''
charset = frame.args['charset']


return p._encode( s, charset )
return p._encode( s, charset )
Line 55: Line 51:
local ret
local ret


if charset ~= (nil or '') then
if charset and charset ~= '' then
ret = mw.text.encode( s, charset )
ret = mw.text.encode( s, charset )
else
else

Latest revision as of 10:44, 14 July 2024

--require('strict')
local p = {}

local function _getBoolean( boolean_str )
	-- from: module:String; adapted
	-- requires an explicit true
	local boolean_value

	if type( boolean_str ) == 'string' then
		boolean_str = boolean_str:lower()
		if boolean_str == 'true' or boolean_str == 'yes' or boolean_str == '1' then
			boolean_value = true
		else
			boolean_value = false
		end
	elseif type( boolean_str ) == 'boolean' then
		boolean_value = boolean_str
	else
		boolean_value = false
	end
	return boolean_value
end

function p.decode( frame )
	local s = frame.args['s'] or ''
	local subset_only = _getBoolean(frame.args['subset_only'] or false)

	return p._decode( s, subset_only )
end

function p._decode( s, subset_only )
	-- U+2009 THIN SPACE: workaround for bug: HTML entity   is decoded incorrect. Entity   gets decoded properly
	s = mw.ustring.gsub( s, ' ', ' ' )
	-- U+03B5 ε GREEK SMALL LETTER EPSILON: workaround for bug (phab:T328840): HTML entity ε is decoded incorrect for gsub(). Entity ε gets decoded properly
	s = mw.ustring.gsub( s, 'ε', 'ε' )

	local ret = mw.text.decode( s, not subset_only )

	return ret
end

function p.encode( frame )
	local s = frame.args['s'] or ''
	local charset = frame.args['charset']

	return p._encode( s, charset )
end

function p._encode( s, charset )
	-- example: charset = '_&©−°\\\"\'\=' -- do escape with backslash not %;
	local ret

	if charset and charset ~= '' then
		ret = mw.text.encode( s, charset )
	else
		-- use default: chartset = '<>&"\' ' (outer quotes = lua required; space = NBSP)
		ret = mw.text.encode( s )
	end 
	
	return ret
end

return p