Modul:ISO15924
local ISO15924 = { suite = "ISO15924",
serial = "2020-01-31", item = 71584769, statics = "codes" }
--[=[ ISO 15924 support for scripting systems
- fetch()
- getLanguageScript()
- getScripts()
- isCJK()
- isRTL()
- isScript()
- isTrans()
- scriptName()
- showScripts()
- testScripts()
- failsafe()
]=] local Failsafe = ISO15924 local GlobalMod = ISO15924 local Unicode
ISO15924.Text = { } ISO15924.Unicode = { } Unicode = ISO15924.Unicode Unicode.RomanN = { bef = { [ 32 ] = true,
[ 160 ] = true, [ 8239 ] = true, [ 40 ] = true, [ 45 ] = true, [ 91 ] = true }, dig = { [ 73 ] = true, -- I [ 86 ] = true, -- V [ 88 ] = true, -- X [ 76 ] = true, -- L [ 67 ] = true, -- C [ 68 ] = true, -- D [ 77 ] = true -- M }, fol = { [ 32 ] = true, [ 160 ] = true, [ 8239 ] = true, [ 41 ] = true, [ 44 ] = true, [ 46 ] = true, [ 93 ] = true } }
ISO15924.Commons = { "cjk",
"iso639script", "reverse", "rtl", "trans", "unicodes" }
local foreignModule = function ( access, advanced, append, alt, alert )
-- Fetch global module -- Precondition: -- access -- string, with name of base module -- advanced -- true, for require(); else mw.loadData() -- append -- string, with subpage part, if any; or false -- alt -- number, of wikidata item of root; or false -- alert -- true, for throwing error on data problem -- Postcondition: -- Returns whatever, probably table -- 2019-10-29 local storage = access local finer = function () if append then storage = string.format( "%s/%s", storage, append ) end end local fun, lucky, r, suited if advanced then fun = require else fun = mw.loadData end GlobalMod.globalModules = GlobalMod.globalModules or { } suited = GlobalMod.globalModules[ access ] if not suited then finer() lucky, r = pcall( fun, "Module:" .. storage ) end if not lucky then if not suited and type( alt ) == "number" and alt > 0 then suited = string.format( "Q%d", alt ) suited = mw.wikibase.getSitelink( suited ) GlobalMod.globalModules[ access ] = suited or true end if type( suited ) == "string" then storage = suited finer() lucky, r = pcall( fun, storage ) end if not lucky and alert then error( "Missing or invalid page: " .. storage, 0 ) end end return r
end -- foreignModule()
local function fill( accumulate, assign, append )
-- Copy external sequence into local collection -- Precondition: -- accumulate -- table, with relevant definitions -- assign -- table, with assigned definitions -- append -- table, if code names to be appended to entries if type( assign ) == "table" then local e for k, v in pairs( assign ) do if type( v ) == "table" then e = { } for kk, vv in pairs( v ) do table.insert( e, vv ) end -- for kk, vv if append then for i = 1, #append do table.insert( e, append[ i ] ) end -- for i end table.insert( accumulate, e ) end end -- for k, v end
end -- fill()
local function fulfil( ask, attribute )
-- Check whether script has a certain attribute -- Precondition: -- ask -- string, with language or script code -- attribute -- string, with "cjk" or "rtl" -- Returns true, if matchin local got = ISO15924.fetch( attribute ) local r if type( got ) == "table" then local n = #ask local script if n == 4 then script = ask elseif n < 4 then script = ISO15924.getLanguageScript( ask ) else script = ask:match( "^%a%a%a?%-(%a%a%a%a)$" ) if not script then script = ask:match( "^(%a%a%a?)%-%a%a$" ) script = ISO15924.getLanguageScript( script ) end end if script then script = script:sub( 1, 1 ):upper() .. script:sub( 2 ):lower() r = got[ script ] end end return r or false
end -- fulfil()
ISO15924.Text.scriptName = function ( assigned, alien, add )
-- Retrieve script name, hopefully linked -- Precondition: -- assigned -- string, with script code -- alien -- string, with language code, or not -- add -- arbitrary additional information -- Returns string local r, trsl if type( assigned ) == "string" and assigned:match( "^%u%l%l%l$" ) then trsl = ISO15924.fetch( "translate" ) r = assigned else r = "" end if type( trsl ) == "table" then local slang if type( alien ) == "string" and alien:match( "^%l%l%l?%-?" ) then slang = alien:lower() end if not slang then if not ISO15924.Text.sublang then local title = mw.title.getCurrentTitle() ISO15924.Text.sublang = title.text:match( "/%l%l%l?$" ) ISO15924.Text.sublang = ISO15924.Text.sublang or true end if type( ISO15924.Text.sublang ) == "string" and type( trsl[ ISO15924.Text.sublang ] ) == "table" then slang = ISO15924.Text.sublang end end if not slang then if not ISO15924.Text.sitelang then local contLang = mw.language.getContentLanguage() ISO15924.Text.sitelang = contLang:getCode():lower() end slang = ISO15924.Text.sitelang end if type( trsl[ slang ] ) == "table" then trsl = trsl[ slang ] elseif type( trsl.en ) == "table" then trsl = trsl.en slang = "en" else trsl = false end if trsl then local pages = ISO15924.fetch( "pages" ) trsl = trsl[ assigned ] if type( trsl ) == "string" then r = trsl elseif type( trsl ) == "table" then if type( trsl[ 1 ] ) == "string" then r = trsl[ 1 ] if add and slang == "de" then if tonumber( add ) == 2 and type( trsl[ 2 ] ) == "string" then r = trsl[ 2 ] end end end end if type( pages ) == "table" then local p for k, v in pairs( pages ) do if type( v ) == "table" and v.lang == slang then p = v break -- for k, v end end -- for k, v if p and type( p.targets ) == "table" then p = p.targets[ assigned ] if type( p ) == "string" then -- different server issues -- if mw.ustring.upper( mw.ustring.sub( p, 1, 1 ) ) ~= mw.ustring.upper( mw.ustring.sub( r, 1, 1 ) ) or mw.ustring.sub( p, 2 ) ~= mw.ustring.sub( r, 2 ) then r = string.format( "%s|%s", p, r ) end r = string.format( "%s", r ) end end end if add and slang == "de" then if tonumber( add ) == 2 then local s = "in " if type( trsl ) == "table" and type( trsl[ 3 ] ) == "string" then s = trsl[ 3 ] .. " " end r = s .. r end end end end return r
end -- ISO15924.Text.scriptName()
Unicode.flat = function ( analyse )
-- Remove markup and syntax from wikitext -- Precondition: -- analyse -- string, with wikitext -- Returns string, with cleaned content plain text local r = analyse if r:find( "&", 1, true ) then r = mw.text.decode( r, true ) end r = mw.text.trim( mw.text.unstrip( r ) ) if r:find( "<", 1, true ) and r:find( ">", 1, true ) then r = r:gsub( "(</?%l[^>]*>)", "" ) end if r:find( "[", 1, true ) and ( ( r:find( "[[", 1, true ) and r:find( "]]", 1, true ) ) or r:find( "[http", 1, true ) or r:find( "[//", 1, true ) ) then local lucky, WLink = pcall( require, "Module:WLink" ) if type( WLink ) == "table" then r = WLink.WLink().getPlain( r ) end end return r
end -- Unicode.flat()
Unicode.getRanges = function ()
-- Retrieve collection of Unicode ranges -- Returns table, with all relations codepoint / scripts if type( Unicode.ranges ) ~= "table" then local e, unique Unicode.ranges = { } unique = ISO15924.fetch( "reverse" ) for k, range in pairs( unique ) do e = { } for j, v in pairs( range ) do table.insert( e, v ) end -- for j, v table.insert( Unicode.ranges, e ) end -- for k, range end return Unicode.ranges
end -- Unicode.getRanges()
Unicode.getScripts = function ( allow, analyse, assume )
-- Check all chars for expected script code ranges -- Precondition: -- allow -- table, with permitted unspecific ranges -- analyse -- string or number or table, with text -- assume -- string, or nil, with ID of expected script -- Returns table, with all relations codepoint / scripts local uc = Unicode.getRanges() local cp = type( analyse ) local r = { } local e, n, p, s, v if cp == "string" then e = Unicode.flat( analyse ) cp = { } n = mw.ustring.len( e ) for i = 1, n do table.insert( cp, mw.ustring.codepoint( e, i, i ) ) end -- for i elseif cp == "table" then cp = analyse elseif cp == "number" then cp = { analyse } end for i = 1, #cp do n = cp[ i ] p = { n, false } for k = 1, #uc do e = uc[ k ] if n <= e[ 2 ] then if n >= e[ 1 ] then v = e[ 3 ] if type( v ) == "table" then s = v[ 1 ] if assume then for j = 2, #v do if v[ j ] == assume then s = v[ j ] break -- for j end end -- for j end else s = "???" end p[ 2 ] = s n = false end break -- for k elseif n < e[ 1 ] then break -- for k end end -- for k if n then for j = 1, #allow do e = allow[ j ] if n <= e[ 2 ] then if n >= e[ 1 ] then p[ 2 ] = true end break -- for j elseif n < e[ 1 ] then break -- for j end end -- for j end table.insert( r, p ) end -- for i return r
end -- Unicode.getScripts()
Unicode.isScript = function ( all, ask, analyse )
-- Check all chars for expected script code ranges -- Precondition: -- all -- table, with all definitions -- ask -- string, with supposed script code -- analyse -- string or number or table, with text -- Returns -- 1. true, if all chars within -- 2. table, with analyse text local f = function ( array, amount, a ) local k = a local e for i = 1, amount do e = array[ i ] if k >= e[ 1 ] then if k <= e[ 2 ] then k = false break -- for i end else break -- for i end end -- for i return k end local s = analyse local cp = type( s ) local uc = { } local xx = { } local r = true local m, na, nu, nx if cp == "string" then s = Unicode.flat( s ) cp = { } na = mw.ustring.len( s ) for i = 1, na do table.insert( cp, mw.ustring.codepoint( s, i, i ) ) end -- for i elseif cp == "table" then cp = s elseif cp == "number" then cp = { s } else cp = { } end Unicode.merge( uc, all, ask ) Unicode.merge( xx, all, "*" ) na = #cp nu = #uc nx = #xx for j = 1, na do m = f( uc, nu, cp[ j ] ) if m then m = f( xx, nx, m ) if m then r = false break -- for j end end end -- for j return r, cp
end -- Unicode.isScript()
Unicode.merge = function ( accumulate, all, ask, append )
-- Ensure single list of items -- Precondition: -- accumulate -- table, with collection to be extended -- all -- table, with all definitions -- ask -- string, with requested script code -- append -- true, if code names to be appended to entries -- The accumulate table may have been extended local g = all[ ask ] if type( g ) == "table" then local codes, s for k, v in pairs( g ) do s = type( v ) break -- for k, v end -- for k, v if s == "string" then for k, v in pairs( g ) do if append then codes = { ask, v } table.sort( codes ) end fill( accumulate, all[ v ], codes ) end -- for k, v Unicode.sort( accumulate ) elseif s == "table" then if append then codes = { ask } end fill( accumulate, g, codes ) end end
end -- Unicode.merge()
Unicode.romanNumbers = function ( array, at )
-- Check for possible roman numbers -- Precondition: -- array -- table, with elements as sequence tables -- all -- number, with position within array -- Returns number, which is identical or greater than at, to proceed local r = at local e = array[ r ] if Unicode.RomanN.dig[ e[ 1 ] ] and r > 1 and Unicode.RomanN.bef[ array[ r - 1 ][ 1 ] ] then local j = r while j < #array do e = array[ j + 1 ] if Unicode.RomanN.dig[ e[ 1 ] ] then j = j + 1 else break -- while j end end -- while j if j == #array or Unicode.RomanN.fol[ e[ 1 ] ] then r = j + 1 end end return r
end -- Unicode.romanNumbers()
Unicode.showScripts = function ( analysed )
-- Retrieve codepoints and assigned script codes for string -- Precondition: -- analysed -- table, as returned by Unicode.getScripts() -- Returns string, with every codepoint-script identified local r = "" local c, d, k, s for i = 1, #analysed do c = analysed[ i ] k = c[ 1 ] s = string.format( "%X", k ) d = c[ 2 ] if d then if type( d ) == "string" then s = string.format( "%s-%s-%s", s, mw.ustring.char( k ), d ) end else s = s .. "-????" end r = string.format( "%s %s", r, s ) end -- for i return r
end -- Unicode.showScripts()
Unicode.sort = function ( apply )
-- Sort code ranges -- apply -- table, with request local function f( a1, a2 ) return a1[ 1 ] < a2[ 1 ] end table.sort( apply, f )
end -- Unicode.sort()
Unicode.testScripts = function ( assume, analyse )
-- Check whether all chars match script -- Precondition: -- assume -- string, with expected script code -- analyse -- string or number or table, with text -- Postcondition: -- Returns -- 1. number, of chars matching assume -- 2. number, of chars violating assume local rA = 0 local rX = 0 local xx = { } local i = 1 local cp, e, p Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" ) cp = Unicode.getScripts( xx, analyse, assume ) while i <= #cp do e = cp[ i ] p = e[ 2 ] if type( p ) == "string" then if p == assume then rA = rA + 1 elseif p == "Latn" then local j = Unicode.romanNumbers( cp, i ) if j > i then i = j rX = rX - 1 end rX = rX + 1 end end i = i + 1 end -- while i return rA, rX
end -- Unicode.testScripts()
ISO15924.fetch = function ( access, alert )
-- Fetch mw.loadData component -- Precondition: -- access -- table name -- alert -- true, for throwing error on data problem -- Postcondition: -- Returns table local r = ISO15924[ access ] if type( r ) ~= "table" then local ext, s, sub if not ISO15924.config then ISO15924.config = true ISO15924.fetch( "config", alert ) -- self if ISO15924.config.live then ISO15924.statics = "commons" end end for i = 1, #ISO15924.Commons do s = ISO15924.Commons[ i ] if s == access then sub = ISO15924.statics break -- for i end end -- for i sub = sub or access ISO15924.loadData = ISO15924.loadData or { } if ISO15924.loadData[ sub ] then ext = ISO15924.loadData[ sub ] else ext = foreignModule( ISO15924.suite, false, sub, ISO15924.item, alert ) ISO15924.loadData[ sub ] = ext end if type( ext ) == "table" then if type( ext[ access ] ) == "table" then r = ext[ access ] elseif sub == "config" then r = ext else r = { } end else r = { } end ISO15924[ access ] = r end return r
end -- ISO15924.fetch()
ISO15924.getLanguageScript = function ( ask )
-- Retrieve primary script for language -- Precondition: -- ask -- string, with language code -- Returns string, with associated script code local r if type( ask ) == "string" then local s = ask local n = #s if n == 7 or n == 8 then r = s:match( "^%a%a%a?%-(%a%a%a%a)$" ) if r then r = r:sub( 1, 1 ):upper() .. r:sub( 2 ):lower() end elseif n > 3 then s = s:match( "^(%a%a%a?)%-" ) end if not r and s then local written = ISO15924.fetch( "iso639script" ) if type( written ) == "table" then r = written[ s:lower() ] if type( r ) == "table" then r = r[ 1 ] end end end end return r or "Latn"
end -- ISO15924.getLanguageScript()
ISO15924.getScripts = function ( analyse )
-- Retrieve codepoints and assigned script codes -- Precondition: -- analyse -- string or number or table, with text -- Returns table, with all relations codepoint / scripts local xx = { } Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" ) return Unicode.getScripts( xx, analyse, false )
end -- ISO15924.getScripts()
ISO15924.isCJK = function ( ask )
-- Check whether script is Chinese-Japanese-Korean (CJK) -- Precondition: -- ask -- string, with language or script code -- Returns true, if CJK return fulfil( ask, "cjk" )
end -- ISO15924.isCJK()
ISO15924.isRTL = function ( ask )
-- Check whether script is right-to-left -- Precondition: -- ask -- string, with language or script code -- Returns true, if right-to-left return fulfil( ask, "rtl" )
end -- ISO15924.isRTL()
ISO15924.isScript = function ( assume, analyse )
-- Check all chars for expected script code ranges -- Precondition: -- assume -- string, with expected script code -- analyse -- string or number or table, with text -- Returns -- 1. true, if all chars within -- 2. analyse as table return Unicode.isScript( ISO15924.fetch( "unicodes" ), assume, analyse )
end -- ISO15924.isScript()
ISO15924.isTrans = function ( ask, assign, about )
-- Check whether valid transcription for context -- Precondition: -- ask -- string, with transcription key -- assign -- string, with language or scripting code -- about -- string or nil, with site scripting code -- Postcondition: -- Returns boolean local r = false local t, trans local r, trsl if type( ask ) == "string" then local trans = ISO15924.fetch( "trans" ) local t = trans[ assign ] if type( t ) == "table" then for k, v in pairs( t ) do if v == ask then r = true break -- for i end end -- for k, v end end if not r and about == "Latn" then r = ( ask == "BGN-PCGN" or ask == "ALA-LC" ) end return r
end -- ISO15924.isTrans()
ISO15924.scriptName = function ( assigned, alien, add )
-- Retrieve script name, hopefully linked -- Precondition: -- assigned -- string, with script code -- alien -- string, with language code, or not -- add -- arbitrary additional information -- Returns string return ISO15924.Text.scriptName( assigned, alien, add )
end -- ISO15924.scriptName()
ISO15924.showScripts = function ( analyse )
-- Retrieve codepoints and assigned script codes for and as string -- Precondition: -- analyse -- string or number or table, with text -- Returns string, with every codepoint-script identified local xx = { } local cp Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" ) cp = Unicode.getScripts( xx, analyse, false ) return Unicode.showScripts( cp )
end -- ISO15924.showScripts()
ISO15924.testScripts = function ( assume, analyse )
-- Check whether all chars match script -- Precondition: -- assume -- string, with expected script code -- analyse -- string or number or table, with text -- Postcondition: -- Returns -- 1. number, of chars matching assume -- 2. number, of chars violating assume return Unicode.testScripts( assume, analyse )
end -- ISO15924.testScripts()
Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance -- Precondition: -- atleast -- string, with required version or "wikidata" or "~" -- or false -- Postcondition: -- Returns string -- with queried version, also if problem -- false -- if appropriate -- 2019-10-15 local last = ( atleast == "~" ) local since = atleast local r if last or since == "wikidata" then local item = Failsafe.item since = false if type( item ) == "number" and item > 0 then local entity = mw.wikibase.getEntity( string.format( "Q%d", item ) ) if type( entity ) == "table" then local seek = Failsafe.serialProperty or "P348" local vsn = entity:formatPropertyValues( seek ) if type( vsn ) == "table" and type( vsn.value ) == "string" and vsn.value ~= "" then if last and vsn.value == Failsafe.serial then r = false else r = vsn.value end end end end end if type( r ) == "nil" then if not since or since <= Failsafe.serial then r = Failsafe.serial else r = false end end return r
end -- Failsafe.failsafe()
-- Export local p = { }
p.getLanguageScript = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" ) return ISO15924.getLanguageScript( s )
end -- p.getLanguageScript
p.isCJK = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" ) return ISO15924.isCJK( s ) and "1" or ""
end -- p.isCJK()
p.isRTL = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" ) return ISO15924.isRTL( s ) and "1" or ""
end -- p.isRTL()
p.isScript = function ( frame )
local s1 = mw.text.trim( frame.args[ 1 ] or "" ) local s2 = mw.text.trim( frame.args[ 2 ] or "" ) local r, cp = ISO15924.isScript( s1, s2 ) return r and "1" or ""
end -- p.isScript
p.isTrans = function ( frame )
-- Check whether valid transcription for context -- 1 -- string, with transcription key -- 2 -- string, with language or scripting code -- site -- string or nil, with site scripting code local s1 = mw.text.trim( frame.args[ 1 ] or "" ) local s2 = mw.text.trim( frame.args[ 2 ] or "" ) local site = mw.text.trim( frame.args.site or "" ) return ISO15924.isTrans( s1, s2, site ) and "1" or ""
end -- p.isTrans
p.scriptName = function ( frame )
local s1 = mw.text.trim( frame.args[ 1 ] or "" ) local s2 = mw.text.trim( frame.args[ 2 ] or "" ) local slang = mw.text.trim( frame.args.lang or "" ) return ISO15924.Text.scriptName( s1, slang, s2 )
end -- p.scriptName
p.showScripts = function ( frame )
local s = frame.args[ 1 ] local r if s then r = ISO15924.showScripts( mw.text.trim( s ) ) else r = "" end return r
end -- p.showScripts
p.failsafe = function ( frame )
-- Versioning interface local s = type( frame ) local since if s == "table" then since = frame.args[ 1 ] elseif s == "string" then since = frame end if since then since = mw.text.trim( since ) if since == "" then since = false end end return Failsafe.failsafe( since ) or ""
end -- p.failsafe()
p.ISO15924 = function ()
return ISO15924
end -- p.ISO15924
return p