Module:Titlelib: Difference between revisions

From Wikimedia Foundation Governance Wiki
Content deleted Content added
Pols12 (talk | contribs)
handle interlang prefixes such as they should ever come after interwiki prefixes (and both should be unique)
m 6 revisions imported from meta:Module:Titlelib: import
 
(3 intermediate revisions by 2 users not shown)
Line 14: Line 14:
if not onlyInterlang then onlyInterlang = false end
if not onlyInterlang then onlyInterlang = false end
str = mw.ustring.lower(str) --prefixes are stored as lower case
str = mw.ustring.lower(str) --prefixes are stored as lower case
for prefix, details in pairs(mw.site.interwikiMap()) do
if mw.site.interwikiMap()[str] then -- `str` is in interwikiMap
if str == prefix then
-- When requested, ensures `str` is a valid language code
return not onlyInterlang or mw.language.isKnownLanguageTag(str)
return not onlyInterlang or mw.language.isKnownLanguageTag(str)
end
end
end
return false
return false -- `str` is not in interwikiMap
end
end


Line 95: Line 95:
return outwikiPart .. ':', inwikiPart
return outwikiPart .. ':', inwikiPart
end

--[[ Gives the interwiki prefix for a given language code or database project id.
E.g. langToWiki("de-formal") will return 'de'.

Currently, doesn’t provide any fallback.
@param lang String Language code or database project identifier (project
code on Wikidata).
@Return String|bool Related interwiki prefix (without colon), or false if
there is none.
@Seealso T253387
]]
function p.langToWiki(lang)
lang = lang:gsub('_', '-') --DB-prefixes use `_` instead of `-`
local exceptions = {
-- ['lang-code'] = 'interwiki-prefix'
['ady-cyrl'] = 'ady',
['aeb'] = 'ar',
['aeb-arab'] = 'ar',
['aeb-latn'] = 'ar',
['arq'] = 'ar',
['ban-bali'] = 'ban',
['be-x-old'] = 'be-tarask', -- Both work
['bho'] = 'bh',
['crh-latn'] = 'crh',
['crh-cyrl'] = 'crh',
['de-at'] = 'de',
['de-ch'] = 'de',
['de-formal'] = 'de',
['egl'] = 'eml',
['en-ca'] = 'en',
['en-gb'] = 'en',
['es-419'] = 'es',
['es-formal'] = 'es',
['frc'] = 'fr',
['gan-hans'] = 'gan',
['gan-hant'] = 'gan',
['gom-deva'] = 'gom',
['gom-latn'] = 'gom',
['gsw'] = 'als',
['hif-latn'] = 'hif',
['hu-formal'] = 'hu',
['hsn'] = 'zh',
['ike'] = 'iu',
['ike-cans'] = 'iu',
['ike-latn'] = 'iu',
['kbd-cyrl'] = 'kbd',
['kk-arab'] = 'kk',
['kk-cn'] = 'kk',
['kk-cyrl'] = 'kk',
['kk-kz'] = 'kk',
['kk-latn'] = 'kk',
['kk-tr'] = 'kk',
['ko-kp'] = 'ko',
['ks-arab'] = 'ks',
['ks-deva'] = 'ks',
['ku-arab'] = 'ku',
['ks-latn'] = 'ku',
['lzh'] = 'zh-classical', -- Both work
['ms-arab'] = 'ms',
['nan'] = 'zh-min-nan', -- Both work
['nb'] = 'no', -- Both work
['nl-informal'] = 'nl',
['mo'] = 'ro', -- Both work
['pt-br'] = 'pt',
['rgn'] = 'eml',
['rup'] = 'roa-rup',
['sgs'] = 'bat-smg',
['shi-latn'] = 'shi',
['shi-tfng'] = 'shi',
['shy-latn'] = 'shy', -- Wikipedia in Incubator
['skr-arab'] = 'skr',
['se-fi'] = 'se',
['se-no'] = 'se',
['se-se'] = 'se',
['sr-ec'] = 'sr',
['sr-el'] = 'sr',
['tg-cyrl'] = 'tg',
['tg-latn'] = 'tg',
['tt-cyrl'] = 'tt',
['tt-latn'] = 'tt',
['ug-arab'] = 'ug',
['ug-latn'] = 'ug',
['uz-cyrl'] = 'uz',
['uz-latn'] = 'uz',
['vro'] = 'fiu-vro',
['yue'] = 'zh-yue', -- Both work
['zh-cn'] = 'zh',
['zh-hans'] = 'zh',
['zh-hant'] = 'zh',
['zh-hk'] = 'zh',
['zh-mo'] = 'zh',
['zh-my'] = 'zh',
['zh-sg'] = 'zh',
['zh-tw'] = 'zh', -- Both work
}
if exceptions[lang] then
return exceptions[lang]
elseif p.isInterlangPrefix(lang) then
return lang
else --given string is not a valid interlang prefix
return false
-- We may consider returning first fallback language code, if it is a
-- valid non-English interwiki.
end
end
end



Latest revision as of 23:25, 22 January 2024

Documentation for this module may be created at Module:Titlelib/doc

-- == Titlelib.lua == --
local p = {}

--[[ Checks wether the given String is a valid interwiki prefix.

	I.e. wether the prefix is registered in mw.site.interwikiMap()
	
	@param onlyInterlang bool Wether to look only for interlang wiki prefix.
	False by default.
	
	@return true if str is a registered interwiki prefix, false else
]]
function p.isInterwiki(str, onlyInterlang)
	if not onlyInterlang then onlyInterlang = false end
	str = mw.ustring.lower(str) --prefixes are stored as lower case
	
	if mw.site.interwikiMap()[str] then -- `str` is in interwikiMap
		-- When requested, ensures `str` is a valid language code
		return not onlyInterlang or mw.language.isKnownLanguageTag(str)
	end
	
	return false -- `str` is not in interwikiMap
end

--[[ Checks wether the given String is a valid interlang wiki prefix.

	I.e. wether the prefix is registered in mw.site.interwikiMap() and is a
	known language code.
	
	@param str String the prefix to check for.
	
	@return true if str is a registered interlang prefix, false else.
]]
function p.isInterlangPrefix(str)
	return p.isInterwiki(str, true)
end

--[[ Checks wether the given String is a valid namespace prefix on Meta.

	I.e. wether the prefix is registered in mw.site.namespaces
	
	@return true if str is a registered interwiki prefix, false else
]]
function p.isNamespace(str)
	str = mw.ustring.lower(str)
	for i, details in pairs(mw.site.namespaces) do
		if str == mw.ustring.lower(details.name)
			or str == mw.ustring.lower(details.canonicalName)
			or (details.aliases[1] and str == mw.ustring.lower(details.aliases[1]))
			--on Meta there is no more than 1 alias; else we should use
			--table.contains(aliases, str)
		then
			return true
		end
	end
	
	return false
end

--[[ Split given title into two parts: interwiki prefixes and page title
	@Return table containing 2 strings:
		* first one is all prefixes which are recognized as interwikis, joined
		with colon
		* second one is the rest of the given string
]]
function p.splitPrefixedTitle(linkStr)
	if not mw.ustring.find(linkStr, ':') then --if there is no colon in title
		return ':', linkStr --that means there is no prefix
	end
	
	local linkTable = mw.text.split(linkStr, ':', true)
	
	local outwikiPart = '' --interwiki prefixes and language code
	local inwikiPart = '' --page name, including namespace prefix
	
	local hasGotLastInterwikiPrefix = false
	local hasInterwikiPrefix = false
	
	for i, linkPart in ipairs(linkTable) do
		if linkPart == '' then
		elseif hasGotLastInterwikiPrefix or p.isNamespace(linkPart) then
			hasGotLastInterwikiPrefix = true
			inwikiPart = inwikiPart .. ':' .. linkPart
		elseif p.isInterlangPrefix(linkPart) then
			outwikiPart = outwikiPart .. ':' .. linkPart
			hasGotLastInterwikiPrefix = true
		elseif not hasInterwikiPrefix and p.isInterwiki(linkPart) then
			outwikiPart = outwikiPart .. ':' .. linkPart
			hasInterwikiPrefix = true
		else
			hasGotLastInterwikiPrefix = true
			inwikiPart = linkPart
		end
	end
	
	return outwikiPart .. ':', inwikiPart
end

--[[ Gives the interwiki prefix for a given language code or database project id.
	E.g. langToWiki("de-formal") will return 'de'.

	Currently, doesn’t provide any fallback.
	
	@param lang String Language code or database project identifier (project
	code on Wikidata).
	
	@Return String|bool Related interwiki prefix (without colon), or false if
	there is none.
	
	@Seealso T253387
]]
function p.langToWiki(lang)
	lang = lang:gsub('_', '-') --DB-prefixes use `_` instead of `-`
	local exceptions = {
--		['lang-code']	= 'interwiki-prefix'
		
		['ady-cyrl']	= 'ady',
		['aeb']			= 'ar',
		['aeb-arab']	= 'ar',
		['aeb-latn']	= 'ar',
		['arq']			= 'ar',
		['ban-bali']	= 'ban',
		['be-x-old']	= 'be-tarask',		-- Both work
		['bho']			= 'bh',
		['crh-latn']	= 'crh',
		['crh-cyrl']	= 'crh',
		['de-at']		= 'de',
		['de-ch']		= 'de',
		['de-formal']	= 'de',
		['egl']			= 'eml',
		['en-ca']		= 'en',
		['en-gb']		= 'en',
		['es-419']		= 'es',
		['es-formal']	= 'es',
		['frc']			= 'fr',
		['gan-hans']	= 'gan',
		['gan-hant']	= 'gan',
		['gom-deva']	= 'gom',
		['gom-latn']	= 'gom',
		['gsw']			= 'als',
		['hif-latn']	= 'hif',
		['hu-formal']	= 'hu',
		['hsn']			= 'zh',
		['ike']			= 'iu',
		['ike-cans']	= 'iu',
		['ike-latn']	= 'iu',
		['kbd-cyrl']	= 'kbd',
		['kk-arab']		= 'kk',
		['kk-cn']		= 'kk',
		['kk-cyrl']		= 'kk',
		['kk-kz']		= 'kk',
		['kk-latn']		= 'kk',
		['kk-tr']		= 'kk',
		['ko-kp']		= 'ko',
		['ks-arab']		= 'ks',
		['ks-deva']		= 'ks',
		['ku-arab']		= 'ku',
		['ks-latn']		= 'ku',
		['lzh']			= 'zh-classical',	-- Both work
		['ms-arab']		= 'ms',
		['nan']			= 'zh-min-nan',		-- Both work
		['nb']			= 'no',				-- Both work
		['nl-informal']	= 'nl',
		['mo']			= 'ro',				-- Both work
		['pt-br']		= 'pt',
		['rgn']			= 'eml',
		['rup']			= 'roa-rup',
		['sgs']			= 'bat-smg',
		['shi-latn']	= 'shi',
		['shi-tfng']	= 'shi',
		['shy-latn']	= 'shy',			-- Wikipedia in Incubator
		['skr-arab']	= 'skr',
		['se-fi']		= 'se',
		['se-no']		= 'se',
		['se-se']		= 'se',
		['sr-ec']		= 'sr',
		['sr-el']		= 'sr',
		['tg-cyrl']		= 'tg',
		['tg-latn']		= 'tg',
		['tt-cyrl']		= 'tt',
		['tt-latn']		= 'tt',
		['ug-arab']		= 'ug',
		['ug-latn']		= 'ug',
		['uz-cyrl']		= 'uz',
		['uz-latn']		= 'uz',
		['vro']			= 'fiu-vro',
		['yue']			= 'zh-yue',			-- Both work
		['zh-cn']		= 'zh',
		['zh-hans']		= 'zh',
		['zh-hant']		= 'zh',
		['zh-hk']		= 'zh',
		['zh-mo']		= 'zh',
		['zh-my']		= 'zh',
		['zh-sg']		= 'zh',
		['zh-tw']		= 'zh',				-- Both work
	}
	if exceptions[lang] then
		return exceptions[lang]
	elseif p.isInterlangPrefix(lang) then
		return lang
	else --given string is not a valid interlang prefix
		return false
		-- We may consider returning first fallback language code, if it is a
		-- valid non-English interwiki.
	end
end

--[[ Prefixes page name with Special:MyLanguage.
	
	@param pageLink String Page name, eventually prefixed with
	interwiki prefixes and namespace.
	
	@return String Name of page which links to given page link
	in user language.
]]
function p.myLangLink(pageLink)
	local outPrefix, pageName = p.splitPrefixedTitle(pageLink)
	return outPrefix .. 'Special:MyLanguage/' .. pageName
end

return p