Misplaced Pages

Module:Wikt-lang/sandbox: Difference between revisions

Article snapshot taken from Wikipedia with creative commons attribution-sharealike license. Give it a read and then ask your questions in the chat. We can research this topic together.
< Module:Wikt-lang Browse history interactively← Previous editNext edit →Content deleted Content added
Revision as of 17:42, 17 July 2024 editAsukite (talk | contribs)Extended confirmed users, Page movers, New page reviewers, Pending changes reviewers, Rollbackers35,239 editsm Asukite moved page Module:Language/sandbox to Module:Wikt-lang/sandbox: Moved per Special:Permalink/1235089085#Requested move 9 July 2024 using Move+← Previous edit Revision as of 12:19, 19 July 2024 edit undoGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,872 edits sync with liveNext edit →
Line 1: Line 1:
require('strict') require('strict')
local m_data = mw.loadData("Module:Language/data") local m_data = mw.loadData("Module:Wikt-lang/data")
local langData = m_data.languages or m_data local langData = m_data.languages or m_data


Line 140: Line 140:
end end


--local function tag(text, languageCode, script, italics) local function tag(text, languageCode, script, italics)
local function tag(text, languageCode, script, italicize)
local data = langData local data = langData
-- Use Misplaced Pages code if it has been given: for instance, -- Use Misplaced Pages code if it has been given: for instance,
Line 148: Line 147:
languageCode = data and data.Wikipedia_code or languageCode languageCode = data and data.Wikipedia_code or languageCode
-- local italicize = script == "Latn" and italics local italicize = script == "Latn" and italics
if not text then text = "" end if not text then text = "" end
Line 168: Line 167:
end end



--[[-------------------------< I T A L I C S _ S E T >--------------------------------------------------------

common function to determine whether <text> ({{{2}}}) renders in italic or upright font face.

this function created to deal with the complaint that English words should not be italicized except when used
for 'words-as-words' (MOS:FOREIGN and MOS:WORDSASWORDS). new with this function is support for |italics=yes to
override the upright rendering of English text.

returns boolean:
true: output rendered in italic font face
false: output rendered in upright font face

]]

local function italics_set (languageCode, scriptCode, italics)
local italicize = true; -- assume that italics is default

if 'no' == italics or 'n' == italics or '-' == italics or -- italics specifically turned off or
'Latn' ~= scriptCode or -- non-Latn script or
'en' == languageCode then -- English language
italicize = false; -- no italics unless overridden by |italics=yes
end

if 'yes' == italics or 'y' == italics or '+' == italics then -- force italics
italicize = true;
end
return italicize;
end




Line 210: Line 179:
local italics = args.italics or args.i or args.italic local italics = args.italics or args.i or args.italic
-- italics = not (italics == "n" or italics == "-" or italics == "no") italics = not (italics == "n" or italics == "-" or italics == "no")
italics = italics_set (languageCode, scriptCode, italics)
return tag(text, languageCode, scriptCode, italics) .. errorText return tag(text, languageCode, scriptCode, italics) .. errorText
Line 230: Line 197:
error("Name for the language code " .. ("%q"):format(languageCode or nil) error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, " .. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to ]") .. "so it should be added to ]")
end end
end end
Line 285: Line 252:
local italics = args.italics or args.i or args.italic local italics = args.italics or args.i or args.italic
-- italics = not (italics == "n" or italics == "-" or italics == "no") italics = not (italics == "n" or italics == "-" or italics == "no")

italics = italics_set (languageCode, scriptCode, italics);

local entry, linkText local entry, linkText
if word2 and word1 then if word2 and word1 then

Revision as of 12:19, 19 July 2024

This is the module sandbox page for Module:Wikt-lang (diff).
See also the companion subpage for test cases (run).
Module documentation[view] [edit] [history] [purge]
BetaThis module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected.
Language templates
Language names (ISO 639)
Interwiki links
Foreign-language text
Other
This module depends on the following other modules:

This module is used by {{Wikt-lang}}. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.

The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name value in this module, the language name is fetched with mw.language.fetchLanguage. When mw.language.fetchLanguage does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.

Examples

Invalid codes

Errors

Comparison of codes

Language code Wikitionary name English Misplaced Pages name
aaq Penobscot Eastern Abnaki
abe Abenaki Western Abnaki
ajp South Levantine Arabic South Levantine Arabic
apc North Levantine Arabic Levantine Arabic
arb Modern Standard Arabic Standard Arabic
cel-x-bryproto Proto-Brythonic Error: unrecognized private tag: bryproto
cu Old Church Slavonic Church Slavonic
egy Egyptian Ancient Egyptian
frp Franco-Provençal Arpitan
gmw-x-proto Proto-West Germanic Error: unrecognized private tag: proto
grk-x-proto Proto-Hellenic Proto-Greek
ine-x-bsproto Proto-Balto-Slavic Error: unrecognized private tag: bsproto
moe Cree Innu
mul Translingual multiple
nds-de German Low German Low German
non-x-proto Proto-Norse Error: unrecognized private tag: proto
poz-x-polproto Proto-Nuclear Polynesian Error: unrecognized private tag: polproto
rw Rwanda-Rundi Kinyarwanda
tts Isan Northeastern Thai
xlu Luwian Cuneiform Luwian
zle-x-ort Old Ruthenian Error: unrecognized private tag: ort

Tracking categories

The above documentation is transcluded from Module:Wikt-lang/doc. (edit | history)
Editors can experiment in this module's sandbox (edit | diff) and testcases (edit | run) pages.
Add categories to the /doc subpage. Subpages of this module.
require('strict')
local m_data = mw.loadData("Module:Wikt-lang/data")
local langData = m_data.languages or m_data

local p = {}

local function ifNotEmpty(value)
	if value == "" then
		return nil
	else
		return value
	end
end

local function makeLinkedName(languageCode)
	local data = langData
	local article = data
	local name = data or data
	return "]:&nbsp;"
end

local function makeEntryName(word, languageCode)
	local data = langData
	local ugsub = mw.ustring.gsub
	word = tostring(word)
	if word == nil then
		error("The function makeEntryName requires a string argument")
	elseif word == "" then
		return ""
	else
		-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
		word = word:gsub("\'\'\'", "")
		word = word:gsub("\'\'", "")
		if data == nil then
			return word
		else
			local replacements = data and data
			if replacements == nil then
				return word
			else
				-- Decompose so that the diacritics of characters such
				-- as á can be removed in one go.
				-- No need to compose at the end, because the MediaWiki software
				-- will handle that.
				if replacements.decompose then
					word = mw.ustring.toNFD(word)
					for i, from in ipairs(replacements.from) do
						word = ugsub(
							word,
							from,
							replacements.to and replacements.to or "")
					end
				else
					for regex, replacement in pairs(replacements) do
						word = ugsub(word, regex, replacement)
					end
				end
				return word
			end
		end
	end
end

p.makeEntryName = makeEntryName

local function fixScriptCode(firstLetter, threeLetters)
	return string.upper(firstLetter) .. string.lower(threeLetters)
end

local function getCodes(codes, text)
	local languageCode, scriptCode, invalidCode
	local errorText
	if codes == nil or codes == "" then
		errorText = 'no language or script code provided'
	elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
		-- A three- or two-letter lowercase sequence at beginning of first parameter
		languageCode =
			codes:find("^%a%a%a?") and (
				codes:match("^(%l%l%l?)")
				or codes:match("^(%a%a%a?)")
					:gsub("(%a%a%a?)", string.lower, 1)
			)
		-- One uppercase and three lowercase letters at the end of the first parameter
		scriptCode =
			codes:find("%a%a%a%a$") and (
				codes:match("(%u%l%l%l)$")
				or gsub(
					codes:match("(%a%a%a%a)$"),
					"(%a)(%a%a%a)",
					fixScriptCode,
					1
				)
			)
	elseif codes:find("^%a%a%a?%-%a%a%a?$")
	or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
		languageCode = codes
	
	-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
	-- letters separated by hyphens. This only allows for one sequence, as it is
	-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
	elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
		languageCode, scriptCode =
			codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
		if not languageCode then
			errorText = '<code>'..codes..'</code> is not a valid language or script code.'
		elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
			errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
		else
			scriptCode = scriptCode:gsub(
				"(%a)(%a%a%a)",
				fixScriptCode,
				1
			)
		end
	elseif codes:find("^%a%a%a?") then
		languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
		languageCode = string.lower(languageCode)
		errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
	elseif codes:find("%-?%a%a%a%a$") then
		invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
		scriptCode = gsub(
			scriptCode,
			"(%a)(%a%a%a)",
			fixScriptCode
		)
		errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
	else
		errorText = '<code>'..codes..'</code> is not a valid language or script code.'
	end
	if not scriptCode or scriptCode == "" then
		scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
	end
	if errorText then
		errorText = ' <span style="font-size: smaller"></span>'
	else
		errorText = ""
	end
	languageCode = m_data.redirects or languageCode
	return languageCode, scriptCode, errorText
end

local function tag(text, languageCode, script, italics)
	local data = langData
	-- Use Misplaced Pages code if it has been given: for instance,
	-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Misplaced Pages
	-- code "ine-x-proto".
	languageCode = data and data.Wikipedia_code or languageCode
	
	local italicize = script == "Latn" and italics
	
	if not text then text = "" end
	
	local textDirectionMarkers = { "", "", "" }
	if data and data == "rtl" then
		textDirectionMarkers = { ' dir="rtl"', '&rlm;', '&lrm;' }
	end
	
	local out = { textDirectionMarkers }
	if italicize then
		table.insert(out, "<i lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</i>")
	else
		table.insert(out, "<span lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</span>")
	end
	table.insert(out, textDirectionMarkers)
	
	return table.concat(out)
end



function p.lang(frame)
	local parent = frame:getParent()
	local args = parent.args and parent.args or frame.args
	
	local codes = args and mw.text.trim(args)
	local text = args or error("Provide text in the second parameter")
	
	local languageCode, scriptCode, errorText = getCodes(codes, text)
	
	local italics = args.italics or args.i or args.italic
	italics = not (italics == "n" or italics == "-" or italics == "no")
	
	return tag(text, languageCode, scriptCode, italics) .. errorText
end

local function linkToWiktionary(entry, linkText, languageCode)
	local data = langData
	local name
	if languageCode then
		if data and data.name then
			name = data.name
		else
			-- On other languages' wikis, use mw.getContentLanguage():getCode(),
			-- or replace 'en' with that wiki's language code.
			name = mw.language.fetchLanguageName(languageCode, 'en')
			if name == "" then
				error("Name for the language code " .. ("%q"):format(languageCode or nil)
					.. " could not be retrieved with mw.language.fetchLanguageName, "
					.. "so it should be added to ]")
			end
		end
		if entry:sub(1, 1) == "*" then
			if name ~= "" then
				entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
			else
				error("Language name is empty")
			end
		elseif data and data.type == "reconstructed" then
			mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
			local frame = mw.getCurrentFrame()
			-- Track reconstructed entries with no asterisk by transcluding
			-- a nonexistent template. This technique is used in Wiktionary:
			-- see ].
			-- ]
			pcall(frame.expandTemplate, frame,
				{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
			if name ~= "" then
				entry = "Reconstruction:" .. name .. "/" .. entry
			else
				error("Language name is empty")
			end
		elseif data and data.type == "appendix" then
			if name ~= "" then
				entry = "Appendix:" .. name .. "/" .. entry
			else
				error("Language name is empty")
			end
		end
		if entry and linkText then
			return "]"
		else
			error("linkToWiktionary needs a Wiktionary entry or link text, or both")
		end
	else
		return "]"
	end
end

function p.wiktlang(frame)
	local parent = frame:getParent()
	local args = parent.args and parent.args or frame.args
	
	local codes = args and mw.text.trim(args)
	local word1 = ifNotEmpty(args)
	local word2 = ifNotEmpty(args)
	
	if not args then
		error("Parameter 2 is required")
	end
	
	local languageCode, scriptCode, errorText = getCodes(codes, word2 or word1)
	
	local italics = args.italics or args.i or args.italic
	italics = not (italics == "n" or italics == "-" or italics == "no")
	
	local entry, linkText
	if word2 and word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	
	local out
	if languageCode and entry and linkText then
		out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics)
	elseif entry and linkText then
		out = linkToWiktionary(entry, linkText)
	else
		out = '<span style="font-size: smaller;"></span>'
	end
	
	if out and errorText then
		return out .. errorText
	else
		return errorText or error("The function wiktlang generated nothing")
	end
end

function p.wikt(frame)
	local parent = frame:getParent()
	local args = parent.args and parent.args or frame.args
	
	local codes = args and mw.text.trim(args)
	local word1 = ifNotEmpty(args)
	local word2 = ifNotEmpty(args)
	
	if not word1 then
		error("Provide a word in parameter 2.")
	end
	
	local languageCode, scriptCode, errorText = getCodes(codes, word1)
	
	local entry, linkText
	if word2 and word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	
	local out
	if languageCode and entry and linkText then
		out = linkToWiktionary(entry, linkText, languageCode) 
	elseif entry and linkText then
		out = linkToWiktionary(entry, linkText)
	else
		out = '<span style="font-size: smaller;"></span>'
	end
	
	if out and errorText then
		return out and out .. errorText
	else
		return errorText or error("The function wikt generated nothing")
	end
end

return p
Category: