Misplaced Pages

Module:Wikt-lang/sandbox: Difference between revisions

Article snapshot taken from Wikipedia with creative commons attribution-sharealike license. Give it a read and then ask your questions in the chat. We can research this topic together.
< Module:Wikt-lang Browse history interactively← Previous editNext edit →Content deleted Content added
Revision as of 15:50, 26 October 2020 editGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,872 editsNo edit summary← Previous edit Revision as of 12:43, 1 December 2020 edit undoGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,872 editsNo edit summaryNext edit →
Line 1: Line 1:
require('Module:No globals') require("Module:No globals")
local m_data = mw.loadData("Module:Language/data/sandbox") local m_data = mw.loadData("Module:Language/data/sandbox")
local langModule = require("Module:Lang/sandbox")
local langData = m_data.languages or m_data local langData = m_data.languages or m_data


Line 8: Line 9:
= "]", = "]",
= "]", = "]",
= "<i lang=\"%s\" xml:lang=\"%s\"%s>%s</i>",
= "<span lang=\"%s\" xml:lang=\"%s\"%s>%s</span>",
} }


local error_messages = { local errorMessages = {
= "No language code.", = "No language code.",
= "No Wiktionary entry.", = "No Wiktionary entry.",
Line 18: Line 17:
} }


local tracking_categories = { local trackingCategories = {
= "]", = "]",
= "]", = "]",
Line 29: Line 28:


local function get_error_message(message) local function get_error_message(message)
return string.format('<span style="font-size:100%%; font-style:normal;" class="error">Error: %s</span>', message) .. tracking_categories return string.format('<span style="font-size: 100%%; font-style: normal;" class="error">Error: %s</span>', message) .. trackingCategories
end end


local function getCodes(code, text) local function getCodes(code, text)
local redirect_code = m_data.redirects local redirectCode = m_data.redirects
if redirect_code then if redirectCode then
code = redirect_code code = redirectCode
table.insert(activeTrackingCategories, tracking_categories) table.insert(activeTrackingCategories, trackingCategories)
end end


local langModule = require('Module:Lang/sandbox').get_ietf_parts local _, _, _, _, private, errorText = langModule.get_ietf_parts(code)
local languageCode, script, region, variant, private, errorText = langModule(code)
-- TODO: "languageCode" not used as "code" is used instead. See if it's needed.


-- Temp code to handle custom private codes until added properly to Module:Lang. -- Temp code to handle custom private codes until added properly to Module:Lang.
if errorText and m_data.languages and m_data.languages.private then if errorText and m_data.languages and m_data.languages.private then
errorText = nil errorText = nil
end
if not script or script == "" then
script = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
end end


return code, script, errorText return code, errorText
end end


Line 93: Line 86:
end end


local function createWiktionaryLink(wiktionaryText, linkText, languageCode) local function createWiktionaryLink(wiktionaryText, displayText, languageCode)
local name_from_tag = require('Module:Lang/sandbox')._name_from_tag local nameFromTag = langModule._name_from_tag({languageCode})
local name_from_lang_module = name_from_tag({languageCode})
if languageCode then if languageCode then
local data = langData local data = langData
Line 101: Line 93:
if data and data.name then if data and data.name then
name = data.name name = data.name
elseif name_from_lang_module and not name_from_lang_module:find("error") then elseif nameFromTag and not nameFromTag:find("error") then
name = name_from_lang_module name = nameFromTag
else else
-- On other languages' wikis, use mw.getContentLanguage():getCode(), -- On other languages' wikis, use mw.getContentLanguage():getCode(),
Line 111: Line 103:


if name == "" then if name == "" then
return get_error_message(string.format(error_messages, languageCode)) return get_error_message(string.format(errorMessages, languageCode))
end end


Line 119: Line 111:
elseif data and data.type == "reconstructed" then elseif data and data.type == "reconstructed" then
-- Track reconstructed entries with no asterisk by transcluding -- Track reconstructed entries with no asterisk by transcluding
table.insert(activeTrackingCategories, tracking_categories) table.insert(activeTrackingCategories, trackingCategories)
wiktionaryText = string.format(strings, name, wiktionaryText) wiktionaryText = string.format(strings, name, wiktionaryText)


Line 126: Line 118:
end end


return string.format(strings, wiktionaryText, name, linkText) return string.format(strings, wiktionaryText, name, displayText)
else else
return string.format(strings, wiktionaryText, linkText) return string.format(strings, wiktionaryText, displayText)
end end
end


-- TODO: see if all or some of the logic can be handled by ]
local function tag(wiktionaryLink, languageCode, script, italics)
local data = langData

local textDirectionMarkers = {"", "", ""}
if data and data == "rtl" then
textDirectionMarkers = {' dir="rtl"', '&rlm;', '&lrm;'}
end

local out = {textDirectionMarkers}

-- Use Misplaced Pages code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Misplaced Pages
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
local italicize = script == "Latn" and italics
if italicize then
table.insert(out, string.format(strings, languageCode, languageCode, textDirectionMarkers, wiktionaryLink))
else
table.insert(out, string.format(strings, languageCode, languageCode, textDirectionMarkers, wiktionaryLink))
end

table.insert(out, textDirectionMarkers)
return table.concat(out)
end end


Line 171: Line 135:
local code = args and mw.text.trim(args) local code = args and mw.text.trim(args)
if not code then if not code then
return get_error_message(error_messages) return get_error_message(errorMessages)
end end


local wiktionaryText = args local wiktionaryText = args
if not wiktionaryText then if not wiktionaryText then
return get_error_message(error_messages) return get_error_message(errorMessages)
end end


local displayText = args local displayText = args
local languageCode, scriptCode, errorMessage = getCodes(code, displayText or wiktionaryText) local languageCode, errorMessage = getCodes(code, displayText or wiktionaryText)


if errorMessage then if errorMessage then
Line 191: Line 155:
local wiktionaryTextCleaned = cleanWiktionaryText(wiktionaryText, languageCode) local wiktionaryTextCleaned = cleanWiktionaryText(wiktionaryText, languageCode)


if not displayText then
local linkText
if displayText then displayText = wiktionaryText
linkText = displayText
else
linkText = wiktionaryText
end end


local wiktionaryLink = createWiktionaryLink(wiktionaryTextCleaned, linkText, languageCode) local wiktionaryLink = createWiktionaryLink(wiktionaryTextCleaned, displayText, languageCode)
if not args then if not args then
wiktionaryLink = tag(wiktionaryLink, languageCode, scriptCode, italics) local langArgs = {code = languageCode, text = wiktionaryLink, italic = italics}
wiktionaryLink = langModule._lang(langArgs)
end end

Revision as of 12:43, 1 December 2020

This is the module sandbox page for Module:Wikt-lang (diff).
See also the companion subpage for test cases (run).
Module documentation[view] [edit] [history] [purge]
BetaThis module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected.
Language templates
Language names (ISO 639)
Interwiki links
Foreign-language text
Other
This module depends on the following other modules:

This module is used by {{Wikt-lang}}. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.

The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name value in this module, the language name is fetched with mw.language.fetchLanguage. When mw.language.fetchLanguage does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.

Examples

Invalid codes

Errors

Comparison of codes

Language code Wikitionary name English Misplaced Pages name
aaq Penobscot Eastern Abnaki
abe Abenaki Western Abnaki
ajp South Levantine Arabic South Levantine Arabic
apc North Levantine Arabic Levantine Arabic
arb Modern Standard Arabic Standard Arabic
cel-x-bryproto Proto-Brythonic Error: unrecognized private tag: bryproto
cu Old Church Slavonic Church Slavonic
egy Egyptian Ancient Egyptian
frp Franco-Provençal Arpitan
gmw-x-proto Proto-West Germanic Error: unrecognized private tag: proto
grk-x-proto Proto-Hellenic Proto-Greek
ine-x-bsproto Proto-Balto-Slavic Error: unrecognized private tag: bsproto
moe Cree Innu
mul Translingual multiple
nds-de German Low German Low German
non-x-proto Proto-Norse Error: unrecognized private tag: proto
poz-x-polproto Proto-Nuclear Polynesian Error: unrecognized private tag: polproto
rw Rwanda-Rundi Kinyarwanda
tts Isan Northeastern Thai
xlu Luwian Cuneiform Luwian
zle-x-ort Old Ruthenian Error: unrecognized private tag: ort

Tracking categories

The above documentation is transcluded from Module:Wikt-lang/doc. (edit | history)
Editors can experiment in this module's sandbox (edit | diff) and testcases (edit | run) pages.
Add categories to the /doc subpage. Subpages of this module.
require("Module:No globals")
local m_data = mw.loadData("Module:Language/data/sandbox")
local langModule = require("Module:Lang/sandbox")
local langData = m_data.languages or m_data

local strings = {
	 = "Reconstruction:%s/%s",
	 = "Appendix:%s/%s",
	 = "]",
	 = "]",
}

local errorMessages = {
	 = "No language code.",
	 = "No Wiktionary entry.",
	 = "The language name for the language code <code>%s</code> was not found.",
}

local trackingCategories = {
	 = "]",
	 = "]",
	 = "]",
}

local activeTrackingCategories = {}

local p = {}

local function get_error_message(message)
	return string.format('<span style="font-size: 100%%; font-style: normal;" class="error">Error: %s</span>', message) .. trackingCategories
end

local function getCodes(code, text)
	local redirectCode = m_data.redirects
	if redirectCode then
		code = redirectCode
		table.insert(activeTrackingCategories, trackingCategories)
	end

	local _, _, _, _, private, errorText = langModule.get_ietf_parts(code)

	-- Temp code to handle custom private codes until added properly to Module:Lang.
	if errorText and m_data.languages and m_data.languages.private then
		errorText = nil
	end

	return code, errorText
end	

local function cleanWiktionaryText(wiktionaryText, languageCode)
	local data = langData
	wiktionaryText = tostring(wiktionaryText)

	-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
	wiktionaryText = wiktionaryText:gsub("\'\'\'", "")
	wiktionaryText = wiktionaryText:gsub("\'\'", "")
	
	-- If the language is not found, return wiktionaryText.
	if data == nil then
		return wiktionaryText
	end

	-- If the language does not have diacritics, return wiktionaryText.
	local replacements = data and data
	if replacements == nil then
		return wiktionaryText
	end

	-- Decompose so that the diacritics of characters such
	-- as á can be removed in one go.
	-- No need to compose at the end, because the MediaWiki software
	-- will handle that.
	local ugsub = mw.ustring.gsub
	if replacements.decompose then
		wiktionaryText = mw.ustring.toNFD(wiktionaryText)
		for i, from in ipairs(replacements.from) do
			wiktionaryText = ugsub(wiktionaryText, from, replacements.to and replacements.to or "")
		end
	else
		for regex, replacement in pairs(replacements) do
			wiktionaryText = ugsub(wiktionaryText, regex, replacement)
		end
	end

	return wiktionaryText
end

local function createWiktionaryLink(wiktionaryText, displayText, languageCode)
	local nameFromTag = langModule._name_from_tag({languageCode})
	if languageCode then
		local data = langData
		local name
		if data and data.name then
			name = data.name
		elseif nameFromTag and not nameFromTag:find("error") then
			name = nameFromTag
		else
			-- On other languages' wikis, use mw.getContentLanguage():getCode(),
			-- or replace 'en' with that wiki's language code.
			-- name = mw.language.fetchLanguageName(languageCode, mw.getContentLanguage():getCode())
			name = mw.language.fetchLanguageName(languageCode, 'en')
		end

		if name == "" then
			return get_error_message(string.format(errorMessages, languageCode))
		end

		if wiktionaryText:sub(1, 1) == "*" then
			wiktionaryText = string.format(strings, name, wiktionaryText:sub(2))

		elseif data and data.type == "reconstructed" then
			-- Track reconstructed entries with no asterisk by transcluding
			table.insert(activeTrackingCategories, trackingCategories)
			wiktionaryText = string.format(strings, name, wiktionaryText)

		elseif data and data.type == "appendix" then
			wiktionaryText = string.format(strings, name, wiktionaryText)
		end

		return string.format(strings, wiktionaryText, name, displayText)
	else
		return string.format(strings, wiktionaryText, displayText)
	end
end

function p.wikt(frame)
	frame = true
	return p.wiktlang(frame)
end

function p.wiktlang(frame)
	local getArgs = require('Module:Arguments').getArgs
	local args = getArgs(frame)

	local code = args and mw.text.trim(args)
	if not code then
		return get_error_message(errorMessages)
	end

	local wiktionaryText = args
	if not wiktionaryText then
		return get_error_message(errorMessages)
	end

	local displayText = args
	local languageCode, errorMessage = getCodes(code, displayText or wiktionaryText)

	if errorMessage then
		return get_error_message(errorMessage)
	end

	local italics = args.italics or args.i
	italics = not (italics == "n" or italics == "-")

	local wiktionaryTextCleaned = cleanWiktionaryText(wiktionaryText, languageCode)

	if not displayText then
		displayText = wiktionaryText
	end

	local wiktionaryLink = createWiktionaryLink(wiktionaryTextCleaned, displayText, languageCode)
	if not args then
		local langArgs = {code = languageCode, text = wiktionaryLink, italic = italics}
		wiktionaryLink = langModule._lang(langArgs)
	end
	
	-- Used for testing
	if args then
		return wiktionaryLink
	else
		return wiktionaryLink .. table.concat(activeTrackingCategories)
	end
end

return p
Category: