Jump to content

Mòideal:script utilities/tracking

O Wiktionary

(deasbaireachd⧼tpt-languages-separator⧽deasaich⧼tpt-languages-separator⧽eachdraidh⧼tpt-languages-separator⧽ceanglaichean⧼tpt-languages-separator⧽doc⧼tpt-languages-separator⧽bogsa-gainmhich⧼tpt-languages-separator⧽cùisean deuchainn)


local export = {}

local U = mw.ustring.char
--[[
--	redirect_to: tracking conditions to redirect to (allows multiple languages
	to use the same conditions)

--	path: added to Template:tracking; default is language code (or code under 
	tracking conditions are listed)
	--	Template:tracking/Greek
	--	Template:tracking/ang

--	code: added after path
	--	Template:tracking/Greek/spacing-coronis
	--	Template:tracking/ang/acute

--	chars: what to search for in the text; can be a table containing multiple
	patterns
]]

export.allTrackingConditions = {
	["ang"] = {
		{ chars = U(0x301), code = "acute", decompose = true },
	},
	["grc"] = {
		{ redirect_to = "Greek" },
		
		path = "Ancient Greek",
		{ chars = U(0x1FBD),							code = "spacing-coronis" },
		{ chars = U(0x1FBF),							code = "spacing-smooth-breathing" },
		{ chars = "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", code = "wrong-apostrophe" },
	},
	["el"] = {
		{ redirect_to = "Greek" },
	},
	["Greek"] = {
		{ chars = "ϑ", code = "wrong-theta" },
		{ chars = "ϰ", code = "wrong-kappa" },
		{ chars = "ϱ", code = "wrong-rho" },
		{ chars = "ϕ", code = "wrong-phi" },
	},
	["ru"] = {
		path = "Russian",
		{ chars = U(0x300), code = "grave-accent", decompose = true },
	},
	["bo"] = {
		path = "Tibetan",
		{ chars = { "[་།]$", "[་།]%]%]$" }, code = "trailing-punctuation" },
	},
	["th"] = {
		path = "Thai",
		{ chars = "เ".."เ", code = "broken-ae" },
		{ chars = "ํ[่้๊๋]?า", code = "broken-am" },
		{ chars = "[ฤฦ]า", code = "wrong-rue-lue" },
	},
	["lo"] = {
		path = "Lao",
		{ chars = "ເ".."ເ", code = "broken-ae" },
		{ chars = "ໍ[່້໊໋]?າ", code = "broken-am" },
		{ chars = "ຫນ", code = "possible-broken-ho-no" },
		{ chars = "ຫມ", code = "possible-broken-ho-mo" },
		{ chars = "ຫລ", code = "possible-broken-ho-lo" },
	},
	["khb"] = {
		path = "Lü",
		{ chars = "ᦵ".."ᦵ", code = "broken-ae" },
		{ chars = "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", code = "possible-wrong-sequence" },
	},
}

local function interpretCondition(condition, text, path)
	-- Ignore conditions that don't have "chars" or "code".
	local required = { "chars", "code" }
	for i, param in pairs(required) do
		if not condition[param] then
			return nil
		end
	end
	
	if type(condition.chars) ~= "table" then
		condition.chars = { condition.chars }
	end
	
	for i, char in pairs(condition.chars) do
		local text = condition.decompose and mw.ustring.toNFD(text) or text
		
		if type(char) == "string" and mw.ustring.find(text, char) then
			require("Module:debug").track("script/" .. path .. "/" .. condition.code)
		end
	end
end

local function interpretConditions(text, code)
	if not text then
		return nil
	end
	
	local trackingConditions = export.allTrackingConditions[code]
	
	if trackingConditions then
		local path = trackingConditions.path or code
		
		for i, condition in ipairs(trackingConditions) do
			local target = condition.redirect_to
			if target then
				interpretConditions(text, target)
			else
				local path = condition.path or path
				
				interpretCondition(condition, text, path)
			end
		end
	end
end

function export.track(text, lang)
	local langCode = lang:getCode()
	interpretConditions(text, langCode)
end

return export