Revision as of 19:30, 27 June 2024 editTrappist the monk (talk | contribs)Administrators479,568 editsNo edit summary← Previous edit |
Latest revision as of 09:55, 16 December 2024 edit undoGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,872 editsNo edit summary |
(79 intermediate revisions by 2 users not shown) |
Line 1: |
Line 1: |
|
require('strict') |
|
require('strict') |
|
|
|
|
local m_data = mw.loadData("Module:Language/data") |
|
|
|
local m_data = mw.loadData("Module:Wikt-lang/data/sandbox") |
|
local langData = m_data.languages or m_data |
|
|
|
local lang_data = m_data.languages or m_data |
|
|
|
|
|
local p = {} |
|
local p = {} |
|
|
|
|
|
local function ifNotEmpty(value) |
|
local error_msgs = { |
|
|
internal = { |
|
if value == "" then |
|
|
|
= "The function get_clean_Wiktionary_page_name requires a page_name.", |
|
return nil |
|
|
|
= "Name for the language code %q could not be retrieved. Add it to ].", |
|
else |
|
|
|
= "The function make_Wiktionary_link needs a Wiktionary page name, display text and language code.", |
|
return value |
|
|
end |
|
}, |
|
|
= "A Wiktionary entry is required.", |
|
end |
|
|
|
= 'Invalid parameter: |%s=', |
|
|
} |
|
|
|
|
|
|
local cfg = { |
|
local function makeLinkedName(languageCode) |
|
|
|
template = "Wikt-lang", |
|
local data = langData |
|
|
|
valid_parameters = { |
|
local article = data |
|
|
|
= true, |
|
local name = data or data |
|
|
|
= true, |
|
return "]: " |
|
|
|
= true, |
|
|
= true, |
|
|
}, |
|
|
error_msg = '<span style="color: #d33;">Error: {{%s}}: %s</span>]', |
|
|
category = "Wikt-lang template errors", |
|
|
namespace = { |
|
|
appendix = { |
|
|
name = "Appendix:%s/%s", |
|
|
data_type = "appendix", |
|
|
}, |
|
|
reconstruction = { |
|
|
name = "Reconstruction:%s/%s", |
|
|
data_type = "reconstructed", |
|
|
}, |
|
|
}, |
|
|
} |
|
|
|
|
|
--[[--------------------------< M A K E _ E R R O R >-------------------------------------------------- |
|
|
|
|
|
Creates an error span with the supplied error message and attaches the error category. |
|
|
|
|
|
]] |
|
|
local function make_error(msg) |
|
|
return string.format(cfg.error_msg, cfg.template, msg, cfg.category) |
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< A D D _ N A M E S P A C E _ T O _ L I N K >-------------------------------------------------- |
|
local function makeEntryName(word, languageCode) |
|
|
|
|
|
local data = langData |
|
|
|
Returns the page_name with a prefix of a Wiktionary namespace, if relevant. |
|
local ugsub = mw.ustring.gsub |
|
|
|
Current namespaces that can be returned: "Appendix:" and "Reconstruction:". |
|
word = tostring(word) |
|
|
|
If not one of the above namespaces, returns the unalerted link_text. |
|
if word == nil then |
|
|
|
|
|
error("The function makeEntryName requires a string argument") |
|
|
|
]] |
|
elseif word == "" then |
|
|
|
local function add_namespace_to_link(data, language_name, page_name) |
|
return "" |
|
|
|
if page_name:sub(1, 1) == "*" then |
|
else |
|
|
|
return string.format(cfg.namespace.reconstruction.name, language_name, page_name:sub(2)) |
|
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. |
|
|
|
end |
|
word = word:gsub("\'\'\'", "") |
|
|
|
|
|
word = word:gsub("\'\'", "") |
|
|
if data == nil then |
|
if data and data.type then |
|
|
if data.type == cfg.namespace.reconstruction.data_type then |
|
return word |
|
|
|
return string.format(cfg.namespace.reconstruction.name, language_name, page_name) |
|
else |
|
|
|
elseif data.type == cfg.namespace.appendix.data_type then |
|
local replacements = data and data |
|
|
|
return string.format(cfg.namespace.appendix.name, language_name, page_name) |
|
if replacements == nil then |
|
|
return word |
|
|
else |
|
|
-- Decompose so that the diacritics of characters such |
|
|
-- as á can be removed in one go. |
|
|
-- No need to compose at the end, because the MediaWiki software |
|
|
-- will handle that. |
|
|
if replacements.decompose then |
|
|
word = mw.ustring.toNFD(word) |
|
|
for i, from in ipairs(replacements.from) do |
|
|
word = ugsub( |
|
|
word, |
|
|
from, |
|
|
replacements.to and replacements.to or "") |
|
|
end |
|
|
else |
|
|
for regex, replacement in pairs(replacements) do |
|
|
word = ugsub(word, regex, replacement) |
|
|
end |
|
|
end |
|
|
return word |
|
|
end |
|
|
end |
|
end |
|
end |
|
end |
|
|
|
|
|
-- If for any reason this reaches here, return the unaltered page_name. |
|
|
return page_name |
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< G E T _ L A N G U A G E _ N A M E >-------------------------------------------------- |
|
p.makeEntryName = makeEntryName |
|
|
|
|
|
|
|
Retrieves the language name. |
|
local function fixScriptCode(firstLetter, threeLetters) |
|
|
|
A langauge is first searched in Module:Wikt-lang/data and if found and has a language name set, returns it. |
|
return string.upper(firstLetter) .. string.lower(threeLetters) |
|
|
|
That database is used to override the language names produced by Module:Lang/data. |
|
|
If no language is found or the language does not have a language name set, returns the language name from Module:Lang/data. |
|
|
|
|
|
]] |
|
|
local function get_Wiktionary_language_name(data, language_name) |
|
|
if data and data.Wiktionary_name then |
|
|
return data.Wiktionary_name |
|
|
end |
|
|
return language_name |
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< M A K E _ W I K T I O N A R Y _ L I N K >-------------------------------------------------- |
|
local function getCodes(codes, text) |
|
|
local languageCode, scriptCode, invalidCode |
|
|
local errorText |
|
|
if codes == nil or codes == "" then |
|
|
errorText = 'no language or script code provided' |
|
|
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then |
|
|
-- A three- or two-letter lowercase sequence at beginning of first parameter |
|
|
languageCode = |
|
|
codes:find("^%a%a%a?") and ( |
|
|
codes:match("^(%l%l%l?)") |
|
|
or codes:match("^(%a%a%a?)") |
|
|
:gsub("(%a%a%a?)", string.lower, 1) |
|
|
) |
|
|
-- One uppercase and three lowercase letters at the end of the first parameter |
|
|
scriptCode = |
|
|
codes:find("%a%a%a%a$") and ( |
|
|
codes:match("(%u%l%l%l)$") |
|
|
or gsub( |
|
|
codes:match("(%a%a%a%a)$"), |
|
|
"(%a)(%a%a%a)", |
|
|
fixScriptCode, |
|
|
1 |
|
|
) |
|
|
) |
|
|
elseif codes:find("^%a%a%a?%-%a%a%a?$") |
|
|
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then |
|
|
languageCode = codes |
|
|
|
|
|
|
|
Creates a Wiktionary link. |
|
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase |
|
|
|
A page_name, display_text and language_code are always needed. Error if they are missing. |
|
-- letters separated by hyphens. This only allows for one sequence, as it is |
|
|
|
|
|
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European). |
|
|
|
A language name can sometimes be nil if the private code is only listed at Module:Wikt-lang/data and not on Module:Lang/data. |
|
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then |
|
|
|
If a language name cannot be retrieved, an erorr is returned. |
|
languageCode, scriptCode = |
|
|
|
|
|
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$") |
|
|
|
]] |
|
if not languageCode then |
|
|
|
local function make_Wiktionary_link(page_name, display_text, language_code, language_name) |
|
errorText = '<code>'..codes..'</code> is not a valid language or script code.' |
|
|
|
if not page_name and not display_text and not language_code then |
|
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then |
|
|
|
return nil, make_error(error_msgs.internal.make_Wiktionary_link) |
|
errorText = '<code>'..scriptCode..'</code> is not a valid script code.' |
|
|
else |
|
|
scriptCode = scriptCode:gsub( |
|
|
"(%a)(%a%a%a)", |
|
|
fixScriptCode, |
|
|
1 |
|
|
) |
|
|
end |
|
|
elseif codes:find("^%a%a%a?") then |
|
|
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)") |
|
|
languageCode = string.lower(languageCode) |
|
|
errorText = '<code>'..invalidCode..'</code> is not a valid script code.' |
|
|
elseif codes:find("%-?%a%a%a%a$") then |
|
|
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$") |
|
|
scriptCode = gsub( |
|
|
scriptCode, |
|
|
"(%a)(%a%a%a)", |
|
|
fixScriptCode |
|
|
) |
|
|
errorText = '<code>'..invalidCode..'</code> is not a valid language code.' |
|
|
else |
|
|
errorText = '<code>'..codes..'</code> is not a valid language or script code.' |
|
|
end |
|
end |
|
|
|
|
if not scriptCode or scriptCode == "" then |
|
|
|
local data = lang_data |
|
scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown" |
|
|
|
language_name = get_Wiktionary_language_name(data, language_name) |
|
|
if not language_name then |
|
|
return make_error(error_msgs.language_code_missing) |
|
end |
|
end |
|
|
|
|
if errorText then |
|
|
|
page_name = add_namespace_to_link(data, language_name, page_name) |
|
errorText = ' <span style="font-size: smaller"></span>' |
|
|
|
|
|
else |
|
|
|
local link = "]" |
|
errorText = "" |
|
|
|
return string.format(link, page_name, language_name, display_text) |
|
end |
|
|
languageCode = m_data.redirects or languageCode |
|
|
return languageCode, scriptCode, errorText |
|
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< R E P L A C E _ C H A R A C T E R S >-------------------------------------------------- |
|
--local function tag(text, languageCode, script, italics) |
|
|
|
|
|
local function tag(text, languageCode, script, italicize) |
|
|
|
Replaces specific characters as defined in Module:Wikt-lang/data in a language's "replacement" value. |
|
local data = langData |
|
|
|
|
|
-- Use Misplaced Pages code if it has been given: for instance, |
|
|
|
]] |
|
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Misplaced Pages |
|
|
|
local function replace_characters(data, text) |
|
-- code "ine-x-proto". |
|
|
languageCode = data and data.Wikipedia_code or languageCode |
|
local replacements = data and data |
|
|
if replacements == nil then |
|
|
|
|
|
-- No replacements needed; use text as is. |
|
-- local italicize = script == "Latn" and italics |
|
|
|
return text |
|
|
|
|
if not text then text = "" end |
|
|
|
|
|
local textDirectionMarkers = { "", "", "" } |
|
|
if data and data == "rtl" then |
|
|
textDirectionMarkers = { ' dir="rtl"', '‏', '‎' } |
|
|
end |
|
end |
|
|
|
|
|
|
|
|
-- Decompose so that the diacritics of characters such |
|
local out = { textDirectionMarkers } |
|
|
|
-- as á can be removed in one go. |
|
if italicize then |
|
|
|
-- No need to compose at the end, because the MediaWiki software |
|
table.insert(out, "<i lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</i>") |
|
|
|
-- will handle that. |
|
else |
|
|
|
if replacements.decompose then |
|
table.insert(out, "<span lang=\"" .. languageCode .. "\"" .. textDirectionMarkers .. ">" .. text .. "</span>") |
|
|
|
text = mw.ustring.toNFD(text) |
|
|
for i, from in ipairs(replacements.from) do |
|
|
text = mw.ustring.gsub(text, from, replacements.to and replacements.to or "") |
|
|
end |
|
|
return text |
|
end |
|
end |
|
|
|
|
table.insert(out, textDirectionMarkers) |
|
|
|
for regex, replacement in pairs(replacements) do |
|
|
|
|
|
text = mw.ustring.gsub(text, regex, replacement) |
|
return table.concat(out) |
|
|
|
end |
|
|
return text |
|
|
|
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< R E M O V E _ B O L D _ I T A L I C >-------------------------------------------------- |
|
|
|
|
|
|
Removes bold and italics, so that words that contain bolding or emphasis can be linked without piping. |
|
--[[-------------------------< I T A L I C S _ S E T >-------------------------------------------------------- |
|
|
|
|
|
|
|
]] |
|
common function to determine whether <text> ({{{2}}}) renders in italic or upright font face. |
|
|
|
local function remove_bold_italic(text) |
|
|
if not text then |
|
|
return text |
|
|
end |
|
|
text = text:gsub("\'\'\'", "") |
|
|
text = text:gsub("\'\'", "") |
|
|
return text |
|
|
end |
|
|
|
|
|
|
--[[--------------------------< G E T _ C L E A N _ W I K T I O N A R Y _ P A G E _ N A M E >-------------------------------------------------- |
|
this function created to deal with the complaint that English words should not be italicized except when used |
|
|
for 'words-as-words' (MOS:FOREIGN and MOS:WORDSASWORDS). new with this function is support for |italics=yes to |
|
|
override the upright rendering of English text. |
|
|
|
|
|
|
|
Returns a clean a Wiktionary page name by removing bold and italics, and by replacing specific characters as defined in Module:Wikt-lang/data. |
|
returns boolean: |
|
|
true: output rendered in italic font face |
|
|
false: output rendered in upright font face |
|
|
|
|
|
|
]] |
|
]] |
|
|
local function get_clean_Wiktionary_page_name(page_name, language_code) |
|
|
page_name = tostring(page_name) |
|
|
if page_name == nil or page_name == "" then |
|
|
return nil, make_error(error_msgs.internal.get_clean_Wiktionary_page_name) |
|
|
end |
|
|
|
|
|
|
page_name = remove_bold_italic(page_name) |
|
local function italics_set (languageCode, scriptCode, italics) |
|
|
local italicize = true; -- assume that italics is default |
|
|
|
|
|
|
|
local data = lang_data |
|
if 'no' == italics or 'n' == italics or '-' == italics or -- italics specifically turned off or |
|
|
|
if data == nil then |
|
'Latn' ~= scriptCode or -- non-Latn script or |
|
|
|
-- No language specific data in module; use text as is. |
|
'en' == languageCode then -- English language |
|
|
|
return page_name |
|
italicize = false; -- no italics unless overridden by |italics=yes |
|
|
end |
|
end |
|
|
|
|
|
|
return replace_characters(data, page_name) |
|
if 'yes' == italics or 'y' == italics or '+' == italics then -- force italics |
|
|
italicize = true; |
|
|
end |
|
|
|
|
|
return italicize; |
|
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< C H E C K _ F O R _ U N K N O W N _ P A R A M E T E R S >-------------------------------------------------- |
|
|
|
|
|
|
Checks that all user-used parameters are valid. |
|
function p.lang(frame) |
|
|
local parent = frame:getParent() |
|
|
local args = parent.args and parent.args or frame.args |
|
|
|
|
|
local codes = args and mw.text.trim(args) |
|
|
local text = args or error("Provide text in the second parameter") |
|
|
|
|
|
local languageCode, scriptCode, errorText = getCodes(codes, text) |
|
|
|
|
|
local italics = args.italics or args.i or args.italic |
|
|
-- italics = not (italics == "n" or italics == "-" or italics == "no") |
|
|
|
|
|
italics = italics_set (languageCode, scriptCode, italics) |
|
|
|
|
|
return tag(text, languageCode, scriptCode, italics) .. errorText |
|
|
end |
|
|
|
|
|
|
|
]] |
|
local function linkToWiktionary(entry, linkText, languageCode) |
|
|
|
local function check_for_unknown_parameters(args) |
|
local data = langData |
|
|
|
for param, _ in pairs(args) do |
|
local name |
|
|
|
if not cfg.valid_parameters then |
|
if languageCode then |
|
|
|
return make_error(string.format(error_msgs.invalid_parameters, param)) |
|
if data and data.name then |
|
|
name = data.name |
|
|
else |
|
|
-- On other languages' wikis, use mw.getContentLanguage():getCode(), |
|
|
-- or replace 'en' with that wiki's language code. |
|
|
name = mw.language.fetchLanguageName(languageCode, 'en') |
|
|
if name == "" then |
|
|
error("Name for the language code " .. ("%q"):format(languageCode or nil) |
|
|
.. " could not be retrieved with mw.language.fetchLanguageName, " |
|
|
.. "so it should be added to ]") |
|
|
end |
|
|
end |
|
|
if entry:sub(1, 1) == "*" then |
|
|
if name ~= "" then |
|
|
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2) |
|
|
else |
|
|
error("Language name is empty") |
|
|
end |
|
|
elseif data and data.type == "reconstructed" then |
|
|
mw.log("Reconstructed language without asterisk:", languageCode, name, entry) |
|
|
local frame = mw.getCurrentFrame() |
|
|
-- Track reconstructed entries with no asterisk by transcluding |
|
|
-- a nonexistent template. This technique is used in Wiktionary: |
|
|
-- see ]. |
|
|
-- ] |
|
|
pcall(frame.expandTemplate, frame, |
|
|
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' }) |
|
|
if name ~= "" then |
|
|
entry = "Reconstruction:" .. name .. "/" .. entry |
|
|
else |
|
|
error("Language name is empty") |
|
|
end |
|
|
elseif data and data.type == "appendix" then |
|
|
if name ~= "" then |
|
|
entry = "Appendix:" .. name .. "/" .. entry |
|
|
else |
|
|
error("Language name is empty") |
|
|
end |
|
|
end |
|
end |
|
if entry and linkText then |
|
|
return "]" |
|
|
else |
|
|
error("linkToWiktionary needs a Wiktionary entry or link text, or both") |
|
|
end |
|
|
else |
|
|
return "]" |
|
|
end |
|
end |
|
end |
|
end |
|
|
|
|
|
|
--[[--------------------------< M A I N >-------------------------------------------------------------------- |
|
function p.wiktlang(frame) |
|
|
|
|
|
local parent = frame:getParent() |
|
|
|
Entry point for {{Wikt-lang}}. |
|
local args = parent.args and parent.args or frame.args |
|
|
|
|
|
|
|
|
|
Parameters are received from the template's frame (parent frame). |
|
local codes = args and mw.text.trim(args) |
|
|
|
|
|
local word1 = ifNotEmpty(args) |
|
|
|
* |1= – language code |
|
local word2 = ifNotEmpty(args) |
|
|
|
* |2= – Wiktionary page name |
|
|
|
|
|
* |3= – display text |
|
|
* |italic= – "no" to disable |
|
|
|
|
|
]] |
|
|
function p.main(frame) |
|
|
local getArgs = require('Module:Arguments').getArgs |
|
|
local args = getArgs(frame) |
|
|
|
|
if not args then |
|
if not args then |
|
error("Parameter 2 is required") |
|
-- A Wiktionary page name is required. |
|
|
return make_error(error_msgs.no_text) |
|
end |
|
end |
|
|
|
|
local languageCode, scriptCode, errorText = getCodes(codes, word2 or word1) |
|
|
|
|
|
local italics = args.italics or args.i or args.italic |
|
|
-- italics = not (italics == "n" or italics == "-" or italics == "no") |
|
|
|
|
|
|
|
local error_msg = check_for_unknown_parameters(args) |
|
italics = italics_set (languageCode, scriptCode, italics); |
|
|
|
if error_msg then |
|
|
return error_msg |
|
|
end |
|
|
|
|
|
|
-- For the display text, use args if supplied, if not, use the Wiktionary page name (args) |
|
local entry, linkText |
|
|
|
args = args or args |
|
if word2 and word1 then |
|
|
|
|
|
entry = makeEntryName(word1, languageCode) |
|
|
|
-- To allow the errors to be associated with this template. |
|
linkText = word2 |
|
|
|
args.template = cfg.template |
|
elseif word1 then |
|
|
|
args.error_category = cfg.category |
|
entry = makeEntryName(word1, languageCode) |
|
|
|
|
|
linkText = word1 |
|
|
|
-- Handle the display text html tag. |
|
|
local lang = require("Module:Lang/sandbox") |
|
|
local result = lang._wikt_lang(args) |
|
|
|
|
|
-- An error returned, stop here. |
|
|
if type(result) == "string" and string.find(result, "Error") then |
|
|
return result |
|
end |
|
end |
|
|
|
|
|
|
|
|
--TODO: Do we need the result to return with a <span title=""> tag? |
|
local out |
|
|
|
|
|
if languageCode and entry and linkText then |
|
|
|
local page_name, error_msg = get_clean_Wiktionary_page_name(args, result.code) |
|
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics) |
|
|
elseif entry and linkText then |
|
if error_msg then |
|
|
return error_msg |
|
out = linkToWiktionary(entry, linkText) |
|
|
else |
|
|
out = '<span style="font-size: smaller;"></span>' |
|
|
end |
|
end |
|
|
|
|
if out and errorText then |
|
|
return out .. errorText |
|
|
else |
|
|
return errorText or error("The function wiktlang generated nothing") |
|
|
end |
|
|
end |
|
|
|
|
|
|
|
local link, error_msg = make_Wiktionary_link(page_name, result.html, result.code, result.name) |
|
function p.wikt(frame) |
|
|
|
if error_msg then |
|
local parent = frame:getParent() |
|
|
|
return error_msg |
|
local args = parent.args and parent.args or frame.args |
|
|
|
|
|
local codes = args and mw.text.trim(args) |
|
|
local word1 = ifNotEmpty(args) |
|
|
local word2 = ifNotEmpty(args) |
|
|
|
|
|
if not word1 then |
|
|
error("Provide a word in parameter 2.") |
|
|
end |
|
|
|
|
|
local languageCode, scriptCode, errorText = getCodes(codes, word1) |
|
|
|
|
|
local entry, linkText |
|
|
if word2 and word1 then |
|
|
entry = makeEntryName(word1, languageCode) |
|
|
linkText = word2 |
|
|
elseif word1 then |
|
|
entry = makeEntryName(word1, languageCode) |
|
|
linkText = word1 |
|
|
end |
|
|
|
|
|
local out |
|
|
if languageCode and entry and linkText then |
|
|
out = linkToWiktionary(entry, linkText, languageCode) |
|
|
elseif entry and linkText then |
|
|
out = linkToWiktionary(entry, linkText) |
|
|
else |
|
|
out = '<span style="font-size: smaller;"></span>' |
|
|
end |
|
|
|
|
|
if out and errorText then |
|
|
return out and out .. errorText |
|
|
else |
|
|
return errorText or error("The function wikt generated nothing") |
|
|
end |
|
end |
|
|
|
|
|
return link .. result.language_categories .. result.maintenance |
|
end |
|
end |
|
|
|
|