Revision as of 11:26, 3 December 2024 editGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,837 editsNo edit summary← Previous edit | Revision as of 13:05, 5 December 2024 edit undoGonnym (talk | contribs)Autopatrolled, Extended confirmed users, Template editors222,837 editsNo edit summaryNext edit → | ||
Line 8: | Line 8: | ||
internal = { | internal = { | ||
= "Name for the language code %q could not be retrieved. Add it to ].", | = "Name for the language code %q could not be retrieved. Add it to ].", | ||
= "The function |
= "The function get_clean_Wiktionary_page_name requires a link string.", | ||
= "The function make_Wiktionary_link needs a Wiktionary link, display text and language code.", | = "The function make_Wiktionary_link needs a Wiktionary link, display text and language code.", | ||
}, | }, | ||
Line 25: | Line 25: | ||
}, | }, | ||
} | } | ||
--[[--------------------------< M A K E _ E R R O R >-------------------------------------------------- | --[[--------------------------< M A K E _ E R R O R >-------------------------------------------------- | ||
Creates an error span with the supplied error message and attaches the error category. | |||
]] | ]] | ||
local function make_error(msg) | local function make_error(msg) | ||
return string.format(cfg.error_msg, cfg.template, msg, cfg.category) | return string.format(cfg.error_msg, cfg.template, msg, cfg.category) | ||
end | end | ||
--[[--------------------------< R E P L A C E _ C H A R A C T E R S >-------------------------------------------------- | |||
Replaces specific characters as defined in Module:Wikt-lang/data in a language's "replacement" value. | |||
--[[--------------------------< M A K E _ E N T R Y _ N A M E >-------------------------------------------------- | |||
]] | ]] | ||
local function replace_characters(data, link_text) | |||
local function make_entry_name(link_text, language_code) | |||
link_text = tostring(link_text) | |||
if link_text == nil or link_text == "" then | |||
return nil, make_error(error_msgs.internal.make_entry_name) | |||
end | |||
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. | |||
link_text = link_text:gsub("\'\'\'", "") | |||
link_text = link_text:gsub("\'\'", "") | |||
local data = langData | |||
if data == nil then | |||
-- No language specific data in module; use text as is. | |||
return link_text | |||
end | |||
local replacements = data and data | local replacements = data and data | ||
if replacements == nil then | if replacements == nil then | ||
Line 74: | Line 59: | ||
replacements.to and replacements.to or "") | replacements.to and replacements.to or "") | ||
end | end | ||
return link_text | |||
else | |||
for regex, replacement in pairs(replacements) do | |||
link_text = mw.ustring.gsub(link_text, regex, replacement) | |||
end | |||
end | end | ||
for regex, replacement in pairs(replacements) do | |||
return mw.ustring.gsub(link_text, regex, replacement) | |||
end | |||
end | |||
--[[--------------------------< R E M O V E _ B O L D _ I T A L I C >-------------------------------------------------- | |||
Removes bold and italics, so that words that contain bolding or emphasis can be linked without piping. | |||
]] | |||
local function remove_bold_italic(link_text) | |||
if not link_text then | |||
return link_text | |||
end | |||
link_text = link_text:gsub("\'\'\'", "") | |||
link_text = link_text:gsub("\'\'", "") | |||
return link_text | return link_text | ||
end | end | ||
--[[--------------------------< G E T _ C L E A N _ W I K T I O N A R Y _ P A G E _ N A M E >-------------------------------------------------- | |||
--TODO: doc | |||
local function get_prefix(language_name, link_text, data) | |||
Returns a clean a Wiktionary page name by removing bold and italics, and by replacing specific characters as defined in Module:Wikt-lang/data. | |||
]] | |||
local function get_clean_Wiktionary_page_name(link_text, language_code) | |||
link_text = tostring(link_text) | |||
if link_text == nil or link_text == "" then | |||
return nil, make_error(error_msgs.internal.get_clean_Wiktionary_page_name) | |||
end | |||
link_text = remove_bold_italic(link_text) | |||
local data = langData | |||
if data == nil then | |||
-- No language specific data in module; use text as is. | |||
return link_text | |||
end | |||
return replace_characters(data, link_text) | |||
end | |||
--[[--------------------------< G E T _ N A M E S P A C E >-------------------------------------------------- | |||
Returns the link_text with a prefix of a Wiktionary namespace, if relevant. | |||
Current namespaces that can be returned: "Appendix:" and "Reconstruction:". | |||
If not one of the above namespaces, returns the unalerted link_text. | |||
]] | |||
local function get_namespace(language_name, link_text, data) | |||
if link_text:sub(1, 1) == "*" then | if link_text:sub(1, 1) == "*" then | ||
return string.format(cfg.reconstruction, language_name, link_text:sub(2)) | return string.format(cfg.reconstruction, language_name, link_text:sub(2)) | ||
Line 105: | Line 133: | ||
end | end | ||
--[[--------------------------< G E T _ L A N G U A G E _ N A M E >-------------------------------------------------- | |||
--TODO: doc. If the code reaches this point and data.name does not have a name, this is an internal error. | |||
Retrieves the language name. | |||
A langauge is first searched in Module:Wikt-lang/data and if found and has a language name set, returns it. | |||
That database is used to override the language names produced by Module:Lang/data. | |||
If no language is found or the language does not have a language name set, returns the language name from Module:Lang/data. | |||
]] | |||
local function get_language_name(data, language_name) | local function get_language_name(data, language_name) | ||
if data and data.name then | if data and data.name then | ||
Line 115: | Line 150: | ||
--[[--------------------------< M A K E _ W I K T I O N A R Y _ L I N K >-------------------------------------------------- | --[[--------------------------< M A K E _ W I K T I O N A R Y _ L I N K >-------------------------------------------------- | ||
Creates a Wiktionary link. | |||
A link_text and display_text are always needed. Error if they are missing. | |||
A link_text, display_text and language_code are always needed. Error if they are missing. | |||
A language name can sometimes be nil if the private code is only listed at Module:Wikt-lang/data and not on Module:Lang/data. | |||
A languge code should never be nil, but only needed if language_name is nil. | |||
]] | |||
A language name can sometimes be nil if the private code is only listed at Module:Wikt-lang/data and not on Module:Lang/data. | |||
If a language name cannot be retrieved, an erorr is returned. | |||
]] | |||
local function make_Wiktionary_link(link_text, display_text, language_code, language_name) | local function make_Wiktionary_link(link_text, display_text, language_code, language_name) | ||
if not link_text and not display_text and not language_code then | if not link_text and not display_text and not language_code then | ||
Line 132: | Line 169: | ||
if link_text:sub(1, 1) == "*" or (data and data.type) then | if link_text:sub(1, 1) == "*" or (data and data.type) then | ||
link_text = |
link_text = get_namespace(language_name, link_text, data) | ||
end | end | ||
return "]" | |||
end | |||
local link = "]" | |||
return string.format(link, link_text, language_name, display_text) | |||
end | |||
--[[--------------------------< W I K T L A N G >-------------------------------------------------------------------- | --[[--------------------------< W I K T L A N G >-------------------------------------------------------------------- | ||
Line 144: | Line 182: | ||
Parameters are received from the template's frame (parent frame). | Parameters are received from the template's frame (parent frame). | ||
* 1 – language code | * |1= – language code | ||
* 2 – link text | * |2= – link text | ||
* 3 – display text | * |3= – display text | ||
* italic – "no" to disable | * |italic= – "no" to disable | ||
]] | ]] | ||
function p.wiktlang(frame) | function p.wiktlang(frame) | ||
local getArgs = require('Module:Arguments').getArgs | local getArgs = require('Module:Arguments').getArgs | ||
local args = getArgs(frame) | local args = getArgs(frame) | ||
-- Handle the display text. | |||
local lang = require("Module:Lang/sandbox2") | |||
if not args then | if not args then | ||
Line 170: | Line 204: | ||
args.error_category = cfg.category | args.error_category = cfg.category | ||
-- Handle the display text html tag. | |||
local lang = require("Module:Lang/sandbox2") | |||
local result = lang._lang(args) | local result = lang._lang(args) | ||
Line 179: | Line 215: | ||
--TODO: we need the result to return without a <span title=""> tag which probably should be removed. | --TODO: we need the result to return without a <span title=""> tag which probably should be removed. | ||
local entry, error_msg = |
local entry, error_msg = get_clean_Wiktionary_page_name(args, result.code) | ||
if error_msg then | if error_msg then | ||
return error_msg | return error_msg | ||
Line 185: | Line 221: | ||
local link | local link | ||
link, error_msg = make_Wiktionary_link(entry, result.html, result.code, result.name) | |||
if result.code then | |||
link, error_msg = make_Wiktionary_link(entry, result.html, result.code, result.name) | |||
else | |||
link, error_msg = make_Wiktionary_link(entry, result.html) | |||
end | |||
if error_msg then | if error_msg then |
Revision as of 13:05, 5 December 2024
This is the module sandbox page for Module:Wikt-lang (diff). See also the companion subpage for test cases (run). |
This module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected. |
Language templates |
---|
Language names (ISO 639) |
|
Interwiki links |
Foreign-language text |
|
Other |
|
This module depends on the following other modules: |
This module is used by {{Wikt-lang}}
. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.
The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name
value in this module, the language name is fetched with mw.language.fetchLanguage
. When mw.language.fetchLanguage
does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.
Examples
{{#invoke:Wikt-lang|wiktlang|en|language}}
-> language{{#invoke:Wikt-lang|wiktlang|en|language|languages}}
-> languages{{#invoke:Wikt-lang|wiktlang|fr|bourguignon}}
-> bourguignon{{#invoke:Wikt-lang|wiktlang|la|homō}}
-> homō{{#invoke:Wikt-lang|wiktlang|grc|δημοκρατίᾱ}}
-> δημοκρατίᾱ{{#invoke:Wikt-lang|wiktlang|ru|язы́к}}
-> язы́к{{#invoke:Wikt-lang|wiktlang|ar|العَرَبِيَّة}}
-> العَرَبِيَّة{{#invoke:Wikt-lang|wiktlang|fa|فارسی}}
-> فارسی{{#invoke:Wikt-lang|wiktlang|ine-x-proto|*h₂enǵʰ-}}
-> *h₂enǵʰ-
Invalid codes
{{#invoke:Wikt-lang|wiktlang|EN|language}}
-> language{{#invoke:Wikt-lang|wiktlang|En|language|languages}}
-> languages{{#invoke:Wikt-lang|wiktlang|La|homō}}
-> homō{{#invoke:Wikt-lang|wiktlang|Grc|δημοκρατίᾱ}}
-> δημοκρατίᾱ{{#invoke:Wikt-lang|wiktlang|Ru|язы́к}}
-> язы́к{{#invoke:Wikt-lang|wiktlang|Ar|العَرَبِيَّة}}
-> العَرَبِيَّة{{#invoke:Wikt-lang|wiktlang|Fa|فارسی}}
-> فارسی
Errors
{{#invoke:Wikt-lang|wiktlang|en-Latin|language}}
-> language
Comparison of codes
Language code | Wikitionary name | English Misplaced Pages name |
---|---|---|
aaq | Penobscot | Eastern Abnaki |
abe | Abenaki | Western Abnaki |
ajp | South Levantine Arabic | South Levantine Arabic |
apc | North Levantine Arabic | Levantine Arabic |
arb | Modern Standard Arabic | Standard Arabic |
cel-x-bryproto | Proto-Brythonic | Error: unrecognized private tag: bryproto |
cu | Old Church Slavonic | Church Slavonic |
egy | Egyptian | Ancient Egyptian |
frp | Franco-Provençal | Arpitan |
gmw-x-proto | Proto-West Germanic | Error: unrecognized private tag: proto |
grk-x-proto | Proto-Hellenic | Proto-Greek |
ine-x-bsproto | Proto-Balto-Slavic | Error: unrecognized private tag: bsproto |
moe | Cree | Innu |
mul | Translingual | multiple |
nds-de | German Low German | Low German |
non-x-proto | Proto-Norse | Error: unrecognized private tag: proto |
poz-x-polproto | Proto-Nuclear Polynesian | Error: unrecognized private tag: polproto |
rw | Rwanda-Rundi | Kinyarwanda |
tts | Isan | Northeastern Thai |
xlu | Luwian | Cuneiform Luwian |
zle-x-ort | Old Ruthenian | Error: unrecognized private tag: ort |
Tracking categories
The above documentation is transcluded from Module:Wikt-lang/doc. (edit | history)Editors can experiment in this module's sandbox (edit | diff) and testcases (edit | run) pages.
Add categories to the /doc subpage. Subpages of this module.
require('strict') local m_data = mw.loadData("Module:Wikt-lang/data/sandbox") local langData = m_data.languages or m_data local p = {} local error_msgs = { internal = { = "Name for the language code %q could not be retrieved. Add it to ].", = "The function get_clean_Wiktionary_page_name requires a link string.", = "The function make_Wiktionary_link needs a Wiktionary link, display text and language code.", }, = "A Wiktionary entry is required.", } local cfg = { template = "Wikt-lang", error_msg = '<span style="color: #d33;">Error: {{%s}}: %s</span>]', category = "Wikt-lang template errors", appendix = "Appendix:%s/%s", reconstruction = "Reconstruction:%s/%s", types = { appendix = "appendix", reconstructed = "reconstructed", }, } --[[--------------------------< M A K E _ E R R O R >-------------------------------------------------- Creates an error span with the supplied error message and attaches the error category. ]] local function make_error(msg) return string.format(cfg.error_msg, cfg.template, msg, cfg.category) end --[[--------------------------< R E P L A C E _ C H A R A C T E R S >-------------------------------------------------- Replaces specific characters as defined in Module:Wikt-lang/data in a language's "replacement" value. ]] local function replace_characters(data, link_text) local replacements = data and data if replacements == nil then -- No replacements needed; use text as is. return link_text end -- Decompose so that the diacritics of characters such -- as á can be removed in one go. -- No need to compose at the end, because the MediaWiki software -- will handle that. if replacements.decompose then link_text = mw.ustring.toNFD(link_text) for i, from in ipairs(replacements.from) do link_text = mw.ustring.gsub( link_text, from, replacements.to and replacements.to or "") end return link_text end for regex, replacement in pairs(replacements) do return mw.ustring.gsub(link_text, regex, replacement) end end --[[--------------------------< R E M O V E _ B O L D _ I T A L I C >-------------------------------------------------- Removes bold and italics, so that words that contain bolding or emphasis can be linked without piping. ]] local function remove_bold_italic(link_text) if not link_text then return link_text end link_text = link_text:gsub("\'\'\'", "") link_text = link_text:gsub("\'\'", "") return link_text end --[[--------------------------< G E T _ C L E A N _ W I K T I O N A R Y _ P A G E _ N A M E >-------------------------------------------------- Returns a clean a Wiktionary page name by removing bold and italics, and by replacing specific characters as defined in Module:Wikt-lang/data. ]] local function get_clean_Wiktionary_page_name(link_text, language_code) link_text = tostring(link_text) if link_text == nil or link_text == "" then return nil, make_error(error_msgs.internal.get_clean_Wiktionary_page_name) end link_text = remove_bold_italic(link_text) local data = langData if data == nil then -- No language specific data in module; use text as is. return link_text end return replace_characters(data, link_text) end --[[--------------------------< G E T _ N A M E S P A C E >-------------------------------------------------- Returns the link_text with a prefix of a Wiktionary namespace, if relevant. Current namespaces that can be returned: "Appendix:" and "Reconstruction:". If not one of the above namespaces, returns the unalerted link_text. ]] local function get_namespace(language_name, link_text, data) if link_text:sub(1, 1) == "*" then return string.format(cfg.reconstruction, language_name, link_text:sub(2)) end if data and data.type then if data.type == cfg.types.reconstructed then -- TODO: if this is an error, replace with category --return nil, make_error("Reconstructed language without asterisk") local frame = mw.getCurrentFrame() mw.log("Reconstructed language without asterisk:", language_name, link_text) pcall(frame.expandTemplate, frame, {title = 'tracking/wikt-lang/reconstructed with no asterisk'}) return string.format(cfg.reconstruction, language_name, link_text) elseif data_type == cfg.types.appendix then return string.format(cfg.appendix, language_name, link_text) end end -- If for any reason this reaches here, return the unaltered link_text. return link_text end --[[--------------------------< G E T _ L A N G U A G E _ N A M E >-------------------------------------------------- Retrieves the language name. A langauge is first searched in Module:Wikt-lang/data and if found and has a language name set, returns it. That database is used to override the language names produced by Module:Lang/data. If no language is found or the language does not have a language name set, returns the language name from Module:Lang/data. ]] local function get_language_name(data, language_name) if data and data.name then return data.name end return language_name end --[[--------------------------< M A K E _ W I K T I O N A R Y _ L I N K >-------------------------------------------------- Creates a Wiktionary link. A link_text, display_text and language_code are always needed. Error if they are missing. A language name can sometimes be nil if the private code is only listed at Module:Wikt-lang/data and not on Module:Lang/data. If a language name cannot be retrieved, an erorr is returned. ]] local function make_Wiktionary_link(link_text, display_text, language_code, language_name) if not link_text and not display_text and not language_code then return nil, make_error(error_msgs.internal.make_Wiktionary_link) end local data = langData language_name = get_language_name(data, language_name) if not language_name then return make_error(error_msgs.language_code_missing) end if link_text:sub(1, 1) == "*" or (data and data.type) then link_text = get_namespace(language_name, link_text, data) end local link = "]" return string.format(link, link_text, language_name, display_text) end --[[--------------------------< W I K T L A N G >-------------------------------------------------------------------- Entry point for {{Wikt-lang}}. Parameters are received from the template's frame (parent frame). * |1= – language code * |2= – link text * |3= – display text * |italic= – "no" to disable ]] function p.wiktlang(frame) local getArgs = require('Module:Arguments').getArgs local args = getArgs(frame) if not args then -- A Wiktionary entry is required. return make_error(error_msgs.no_text) end -- The display text should be the text wrapped in the language tag. args = args or args -- To allow the errors to be associated with this template. args.template = cfg.template args.error_category = cfg.category -- Handle the display text html tag. local lang = require("Module:Lang/sandbox2") local result = lang._lang(args) -- An error returned, stop here. if type(result) == "string" and string.find(result, "Error") then return result end --TODO: we need the result to return without a <span title=""> tag which probably should be removed. local entry, error_msg = get_clean_Wiktionary_page_name(args, result.code) if error_msg then return error_msg end local link link, error_msg = make_Wiktionary_link(entry, result.html, result.code, result.name) if error_msg then return error_msg end return link .. result.language_categories .. result.maintenance end return pCategory: