Modul:Text
Version vom 5. November 2013, 23:27 Uhr von te>PerfektesChaos (Setup)
--[=[ 2013-11-05 Text utilities ]=]
local Text = { } local patternCJK = false local patternLatin = false local patternTerminated = false
Text.containsCJK = function ( analyse )
-- Is any CJK code within? -- Parameter: -- analyse -- string -- Returns: true, if CJK detected if not patternCJK then patternCJK = mw.ustring.char( 91, 13312, 45, 40959, 131072, 45, 178207, 93 ) end if mw.ustring.find( analyse, patternCJK ) then r = true else r = false end return r
end -- Text.containsCJK()
Text.sentenceTerminated = function ( analyse )
-- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not patternTerminated then patternTerminated = mw.ustring.char( 91, 12290, 65281, 65294, 65311 ) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find( analyse, patternTerminated ) then r = true else r = false end return r
end -- Text.sentenceTerminated()
Text.uprightNonlatin = function ( adjust )
-- Ensure non-italics for non-latin text parts
-- One single greek letter might be granted
-- Precondition:
-- adjust -- string
-- Returns: string with non-latin parts enclosed in
local r
if not patternLatin then
patternLatin = mw.ustring.char( 94, 91,
7, 45, 591,
8194, 45, 8250,
93, 42, 36 )
end
if mw.ustring.match( adjust, patternLatin ) then
-- latin only, horizontal dashes, quotes
r = adjust
else
local c
local j = false
local k = 1
local m = false
local n = mw.ustring.len( adjust )
local span = "%s%s%s"
local flat = function ( a )
-- isLatin
return a <= 591 or ( a >= 8194 and a <= 8250 )
end -- flat()
local form = function ( a )
return string.format( span,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
end -- form()
r = ""
for i = 1, n do
c = mw.ustring.codepoint( adjust, i, i )
if c > 32 then
if flat( c ) then
if j then
if m then
if i == m then
-- single greek letter.
j = false
end
m = false
end
if j then
r = form( i - 1 )
j = false
k = i
end
end
elseif not j then
j = i
if c >= 880 and c <= 1023 then
-- single greek letter?
m = i + 1
else
m = false
end
end
elseif m then
m = m + 1
end
end -- for i
if j then
r = form( n )
else
r = r .. mw.ustring.sub( adjust, k )
end
end
return r
end -- Text.uprightNonlatin()
-- Export local p = { }
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
p.Text = function ()
return Text
end -- p.Text
return p