< Module:Wikt-lang
Browse history interactively← Previous editNext edit →
Revision as of 17:42, 17 July 2024
Module documentation[view] [edit] [history] [purge]
| This module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected. |
Language templates |
---|
Language names (ISO 639) |
---|
|
Interwiki links |
---|
|
Foreign-language text |
---|
|
Other |
---|
|
|
| This module depends on the following other modules: |
This module is used by {{Wikt-lang}}
. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.
The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name
value in this module, the language name is fetched with mw.language.fetchLanguage
. When mw.language.fetchLanguage
does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.
Examples
Invalid codes
Errors
Comparison of codes
Language code
|
Wikitionary name
|
English Misplaced Pages name
|
aaq
|
Penobscot
|
Eastern Abnaki
|
abe
|
Abenaki
|
Western Abnaki
|
ajp
|
South Levantine Arabic
|
South Levantine Arabic
|
apc
|
North Levantine Arabic
|
Levantine Arabic
|
arb
|
Modern Standard Arabic
|
Standard Arabic
|
cel-x-bryproto
|
Proto-Brythonic
|
Error: unrecognized private tag: bryproto
|
cu
|
Old Church Slavonic
|
Church Slavonic
|
egy
|
Egyptian
|
Ancient Egyptian
|
frp
|
Franco-Provençal
|
Arpitan
|
gmw-x-proto
|
Proto-West Germanic
|
Error: unrecognized private tag: proto
|
grk-x-proto
|
Proto-Hellenic
|
Proto-Greek
|
ine-x-bsproto
|
Proto-Balto-Slavic
|
Error: unrecognized private tag: bsproto
|
moe
|
Cree
|
Innu
|
mul
|
Translingual
|
multiple
|
nds-de
|
German Low German
|
Low German
|
non-x-proto
|
Proto-Norse
|
Error: unrecognized private tag: proto
|
poz-x-polproto
|
Proto-Nuclear Polynesian
|
Error: unrecognized private tag: polproto
|
rw
|
Rwanda-Rundi
|
Kinyarwanda
|
tts
|
Isan
|
Northeastern Thai
|
xlu
|
Luwian
|
Cuneiform Luwian
|
zle-x-ort
|
Old Ruthenian
|
Error: unrecognized private tag: ort
|
Tracking categories
The above documentation is transcluded from Module:Wikt-lang/doc. (edit | history)
Editors can experiment in this module's sandbox (edit | diff) and testcases (edit | run) pages.
Add categories to the /doc subpage. Subpages of this module.require('strict')
local m_data = mw.loadData("Module:Language/data")
local langData = m_data.languages or m_data
local p = {}
local function ifNotEmpty(value)
if value == "" then
return nil
else
return value
end
end
local function makeLinkedName(languageCode)
local data = langData
local article = data
local name = data or data
return "]: "
end
local function makeEntryName(word, languageCode)
local data = langData
local ugsub = mw.ustring.gsub
word = tostring(word)
if word == nil then
error("The function makeEntryName requires a string argument")
elseif word == "" then
return ""
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = word:gsub("\'\'\'", "")
word = word:gsub("\'\'", "")
if data == nil then
return word
else
local replacements = data and data
if replacements == nil then
return word
else
-- Decompose so that the diacritics of characters such
-- as á can be removed in one go.
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
return word
end
end
end
end
p.makeEntryName = makeEntryName
local function fixScriptCode(firstLetter, threeLetters)
return string.upper(firstLetter) .. string.lower(threeLetters)
end
local function getCodes(codes, text)
local languageCode, scriptCode, invalidCode
local errorText
if codes == nil or codes == "" then
errorText = 'no language or script code provided'
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
codes:find("^%a%a%a?") and (
codes:match("^(%l%l%l?)")
or codes:match("^(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
codes:find("%a%a%a%a$") and (
codes:match("(%u%l%l%l)$")
or gsub(
codes:match("(%a%a%a%a)$"),
"(%a)(%a%a%a)",
fixScriptCode,
1
)
)
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = codes
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%a%a%a?") then
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
elseif codes:find("%-?%a%a%a%a$") then
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
scriptCode = gsub(
scriptCode,
"(%a)(%a%a%a)",
fixScriptCode
)
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
else
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
end
if not scriptCode or scriptCode == "" then
scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
end
if errorText then
errorText = ' <span style="font-size: smaller"></span>'
else
errorText = ""
end
languageCode = m_data.redirects or languageCode
return languageCode, scriptCode, errorText
end
--local function tag(text, languageCode, script, italics)
local function tag(text, languageCode, script, italicize)
local data = langData
-- Use Misplaced Pages code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Misplaced Pages
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
-- local italicize = script == "Latn" and italics
if not text then text = "" end
local textDirectionMarkers = { "", "", "" }
if data and data == "rtl" then
textDirectionMarkers = { ' dir="rtl"', '‏', '‎' }
end
local out = { textDirectionMarkers }
if italicize then
table.insert(out, "<i lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</i>")
else
table.insert(out, "<span lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</span>")
end
table.insert(out, textDirectionMarkers)
return table.concat(out)
end
--[[-------------------------< I T A L I C S _ S E T >--------------------------------------------------------
common function to determine whether <text> ({{{2}}}) renders in italic or upright font face.
this function created to deal with the complaint that English words should not be italicized except when used
for 'words-as-words' (MOS:FOREIGN and MOS:WORDSASWORDS). new with this function is support for |italics=yes to
override the upright rendering of English text.
returns boolean:
true: output rendered in italic font face
false: output rendered in upright font face
]]
local function italics_set (languageCode, scriptCode, italics)
local italicize = true; -- assume that italics is default
if 'no' == italics or 'n' == italics or '-' == italics or -- italics specifically turned off or
'Latn' ~= scriptCode or -- non-Latn script or
'en' == languageCode then -- English language
italicize = false; -- no italics unless overridden by |italics=yes
end
if 'yes' == italics or 'y' == italics or '+' == italics then -- force italics
italicize = true;
end
return italicize;
end
function p.lang(frame)
local parent = frame:getParent()
local args = parent.args and parent.args or frame.args
local codes = args and mw.text.trim(args)
local text = args or error("Provide text in the second parameter")
local languageCode, scriptCode, errorText = getCodes(codes, text)
local italics = args.italics or args.i or args.italic
-- italics = not (italics == "n" or italics == "-" or italics == "no")
italics = italics_set (languageCode, scriptCode, italics)
return tag(text, languageCode, scriptCode, italics) .. errorText
end
local function linkToWiktionary(entry, linkText, languageCode)
local data = langData
local name
if languageCode then
if data and data.name then
name = data.name
else
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
-- or replace 'en' with that wiki's language code.
name = mw.language.fetchLanguageName(languageCode, 'en')
if name == "" then
error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to ]")
end
end
if entry:sub(1, 1) == "*" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see ].
-- ]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "Appendix:" .. name .. "/" .. entry
else
error("Language name is empty")
end
end
if entry and linkText then
return "]"
else
error("linkToWiktionary needs a Wiktionary entry or link text, or both")
end
else
return "]"
end
end
function p.wiktlang(frame)
local parent = frame:getParent()
local args = parent.args and parent.args or frame.args
local codes = args and mw.text.trim(args)
local word1 = ifNotEmpty(args)
local word2 = ifNotEmpty(args)
if not args then
error("Parameter 2 is required")
end
local languageCode, scriptCode, errorText = getCodes(codes, word2 or word1)
local italics = args.italics or args.i or args.italic
-- italics = not (italics == "n" or italics == "-" or italics == "no")
italics = italics_set (languageCode, scriptCode, italics);
local entry, linkText
if word2 and word1 then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif word1 then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
local out
if languageCode and entry and linkText then
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '<span style="font-size: smaller;"></span>'
end
if out and errorText then
return out .. errorText
else
return errorText or error("The function wiktlang generated nothing")
end
end
function p.wikt(frame)
local parent = frame:getParent()
local args = parent.args and parent.args or frame.args
local codes = args and mw.text.trim(args)
local word1 = ifNotEmpty(args)
local word2 = ifNotEmpty(args)
if not word1 then
error("Provide a word in parameter 2.")
end
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local entry, linkText
if word2 and word1 then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif word1 then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
local out
if languageCode and entry and linkText then
out = linkToWiktionary(entry, linkText, languageCode)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '<span style="font-size: smaller;"></span>'
end
if out and errorText then
return out and out .. errorText
else
return errorText or error("The function wikt generated nothing")
end
end
return p
Category: