Mòideal:Language/data/ISO 639-2/make
Coltas
(deasbaireachd⧼tpt-languages-separator⧽ ⧼tpt-languages-separator⧽eachdraidh⧼tpt-languages-separator⧽ceanglaichean⧼tpt-languages-separator⧽doc⧼tpt-languages-separator⧽bogsa-gainmhich⧼tpt-languages-separator⧽cùisean deuchainn)
This module is currently protected from editing. See the protection policy and protection log for more details. Please discuss any changes on the talk page; you may submit an edit request to ask an administrator to make an edit if it is uncontroversial or supported by consensus. You may also request that this page be unprotected. |
This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
Reads a local copy of data from the table at Codes for the Representation of Names of Languages, extracts the ISO 639-2 codes, names, and ISO 639-1 synonyms. There are two functions in the tool: ISO_synonym_extract()
and ISO_2_name_extract()
.
Ùsaid - Usage
To use this tool:
- open a blank sandbox page and paste either or both of these
{{#invoke:}}
s into it at the top:- for use in Mòideal:Lang/ISO 639 synonyms:
{{#invoke:Language/data/ISO 639-2/make|ISO_synonym_extract|file-date=YYYY-MM-DD
- for use in Mòideal:Language/data/ISO 639-2:
{{#invoke:Language/data/ISO 639-2/make|ISO_2_name_extract|file-date=YYYY-MM-DD
- for use in Mòideal:Lang/ISO 639 synonyms:
- go to the current Codes for the Representation of Names of Languages. Copy the content of the table on that page and paste it into the sandbox page below the
{{#invoke:}}
. - click Show preview
- wait
- get result
require('Module:No globals');
--[=[------------------------< I S O _ S Y N O N Y M _ E X T R A C T >-----------------------------------------
{{#invoke:Language/data/ISO 639-2/make|ISO_synonym_extract|file-date=2013-01-11}}
reads a local copy of data from the table at http://www.loc.gov/standards/iso639-2/php/English_list.php, extracts
the ISO 639-2 (or 639-2T) codes that have equivalent ISO 639-1 codes and creates a table to translate 639-2 to 639-1.
ISO-639-3 uses 639-2T codes
useful lines in the source table have the form:
<English name>\t<all English names>\t<all French names>\t<639-2 code>\t<639-1 code>\n
where:
<English name> is primary English name (not used here); one of <all English names> so duplicates code listing
<all English names> is all of the English names (not used here)
<all French names> is all of the French names (not used here)
<639-2 code> is the three-character ISO 639-2 or 639-2B/639-2T language code; when 639-2T present, use that code
<639-1 code> is the two-character ISO 639-1 language code synonym of the -2 code (if one is defined)
like this (with synonym):
Abkhazian Abkhazian abkhaze abk ab
or (without synonym):
Achinese Achinese aceh ace
for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces
]=]
local function ISO_synonym_extract (frame)
local page = mw.title.getCurrentTitle(); -- get a page object for this page
local content = page:getContent(); -- get unparsed content
local content_table = {}; -- table of text lines from source
local split_table = {}; -- table of lines split at the tabs
local skip_table = {}; -- table of 636-2/639-2T codes that have been handled; used to prevent duplication
local out_table = {}; -- output table
local file_date = 'File-Date: ' .. frame.args["file-date"]; -- set the file date line from |file-date= (from the bottom of the source page)
content_table = mw.text.split (content, '[\r\n]'); -- make a table of text lines
for _, line in ipairs (content_table) do -- for each line
split_table = mw.text.split (line, '\t'); -- split at the table
if split_table[5] and (' ' ~= split_table[5]) then -- if there is a 639-1 code
local code = split_table[4]:match ('%a+/(%a+)') or split_table[4]; -- when 639-2B/639-2T use 639-2T else use 639-2
if not skip_table[code] then -- skip if code already in the skip table because more than one language name
skip_table[code] = true; -- remember that we've handled this 636-2/639-2T code
table.insert (out_table, "[\"" .. code .. "\"] = \"" .. split_table[5] .. "\""); -- make new table entry
end
end
end
table.sort (out_table);
return "<br /><pre>-- " .. file_date .. "<br />return {<br />	" .. table.concat (out_table, ',<br />	') .. "<br />	}<br />" .. "</pre>";
end
--[[--------------------------< I S O _ 2 _ N A M E _ E X T R A C T >------------------------------------------
{{#invoke:Language/data/ISO 639-2/make|ISO_2_name_extract|file-date=2013-01-11}}
reads a local copy of data from the table at http://www.loc.gov/standards/iso639-2/php/English_list.php, extracts
the ISO 639-2 and 639-2T codes and their associated language names
useful lines in the source table have the form:
<English name>\t<all English names>\t<all French names>\t<639-2 code>\t<639-1 code>\n
where:
<English name> is primary English name (not used here); one of <all English names> so duplicates code listing
<all English names> is all of the English names (used here)
<all French names> is all of the French names (not used here)
<639-2 code> is the three-character ISO 639-2 or 639-2B/639-2T language code; both are used
<639-1 code> is the two-character ISO 639-1 language code synonym of the -2 code (not used here)
for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces
]]
local function ISO_2_name_extract (frame)
local page = mw.title.getCurrentTitle(); -- get a page object for this page
local content = page:getContent(); -- get unparsed content
local content_table = {}; -- table of text lines from source
local split_table = {}; -- table of lines split at the tabs
local skip_table = {['qaa-qtz']=true}; -- table of 636-2/639-2T codes that have been handled; used to prevent duplication; qaa-qtz reserved for local use so not supported here
local name_table = {}; -- holds language names for processing
local code_table = {}; -- because some languages have both -2B and -2T codes
local out_table = {}; -- output table
local file_date = 'File-Date: ' .. frame.args["file-date"]; -- set the file date line from |file-date= (from the bottom of the source page)
content_table = mw.text.split (content, '[\r\n]'); -- make a table of text lines
for _, line in ipairs (content_table) do -- for each line
split_table = mw.text.split (line, '\t'); -- split at the tab
if split_table[4] then -- if a code then continue processing; skip this line else
name_table = mw.text.split (split_table[2], ' *; *'); -- split 'all English names' at the '; ' into a table of individual names
for i, v in ipairs (name_table) do
name_table [i] = mw.ustring.gsub (v, '(.+)', '"%1"'); -- add double quotes around each name
end
code_table = mw.text.split (split_table[4], ' */ *'); -- split 'ISO 639-2' code at the '/' into a table of -2B and -2T individual codes
for _, code in ipairs (code_table) do -- now built a table entry for the code(s) and its(their) associated language(s)
if not skip_table[code] then -- source data has duplicates so check to see if we have already done this code
table.insert (out_table,
table.concat ({
'["', -- open code index
code, -- the code
'"] = {', -- close code index; open name table
table.concat(name_table, ', '), -- add the names
'}' -- close the names table
})
)
skip_table[code] = true; -- remember that we've done this code
end
end
end
end
table.sort (out_table);
return "<br /><pre>-- " .. file_date .. "<br />return {<br />	" .. table.concat (out_table, ',<br />	') .. "<br />	}<br />" .. "</pre>";
end
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]
return {
ISO_synonym_extract = ISO_synonym_extract,
ISO_2_name_extract = ISO_2_name_extract
};