Modul:Text: Unterschied zwischen den Versionen
te>Umherirrender K (export concatParams and allow format string) |
te>PerfektesChaos (+ ucfirstAll, update uprightNonlatin) |
||
Zeile 1: | Zeile 1: | ||
− | --[=[ 2014- | + | --[=[ 2014-09-27 |
Text utilities | Text utilities | ||
]=] | ]=] | ||
Zeile 103: | Zeile 103: | ||
return r | return r | ||
end -- Text.sentenceTerminated() | end -- Text.sentenceTerminated() | ||
+ | |||
+ | |||
+ | |||
+ | Text.ucfirstAll = function ( adjust ) | ||
+ | -- Capitalize all words | ||
+ | -- Precondition: | ||
+ | -- adjust -- string | ||
+ | -- Returns: string with all first letters in upper case | ||
+ | local r = " " .. adjust | ||
+ | local i = 1 | ||
+ | local c, j, m | ||
+ | if adjust:find( "&" ) then | ||
+ | r = r:gsub( "&", "&" ) | ||
+ | :gsub( "<", "<" ) | ||
+ | :gsub( ">", ">" ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( "‌", "‌" ) | ||
+ | :gsub( "‍", "‍" ) | ||
+ | :gsub( "‎", "‎" ) | ||
+ | :gsub( "‏", "‏" ) | ||
+ | m = true | ||
+ | end | ||
+ | while i do | ||
+ | i = mw.ustring.find( r, "%W%l", i ) | ||
+ | if i then | ||
+ | j = i + 1 | ||
+ | c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) | ||
+ | r = string.format( "%s%s%s", | ||
+ | mw.ustring.sub( r, 1, i ), | ||
+ | c, | ||
+ | mw.ustring.sub( r, i + 2 ) ) | ||
+ | i = j | ||
+ | end | ||
+ | end -- while i | ||
+ | r = r:sub( 2 ) | ||
+ | if m then | ||
+ | r = r:gsub( "&", "&" ) | ||
+ | :gsub( "<", "<" ) | ||
+ | :gsub( ">", ">" ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( "‌", "‌" ) | ||
+ | :gsub( "‍", "‍" ) | ||
+ | :gsub( "‎", "‎" ) | ||
+ | :gsub( "‏", "‏" ) | ||
+ | :gsub( "&#X(%x+);", "&#x%1;" ) | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.ucfirstAll() | ||
Zeile 132: | Zeile 182: | ||
-- isLatin | -- isLatin | ||
return a <= 591 or ( a >= 8194 and a <= 8250 ) | return a <= 591 or ( a >= 8194 and a <= 8250 ) | ||
− | + | end -- flat() | |
local form = function ( a ) | local form = function ( a ) | ||
return string.format( span, | return string.format( span, | ||
Zeile 138: | Zeile 188: | ||
mw.ustring.sub( adjust, k, j - 1 ), | mw.ustring.sub( adjust, k, j - 1 ), | ||
mw.ustring.sub( adjust, j, a ) ) | mw.ustring.sub( adjust, j, a ) ) | ||
− | + | end -- form() | |
r = "" | r = "" | ||
for i = 1, n do | for i = 1, n do | ||
c = mw.ustring.codepoint( adjust, i, i ) | c = mw.ustring.codepoint( adjust, i, i ) | ||
− | if c > 64 then | + | if c > 64 or c == 38 or c == 60 then -- '&' '<' |
if flat( c ) then | if flat( c ) then | ||
if j then | if j then | ||
Zeile 153: | Zeile 203: | ||
end | end | ||
if j then | if j then | ||
− | r = form( | + | local nx = i - 1 |
+ | local s = "" | ||
+ | for ix = nx, 1, -1 do | ||
+ | c = mw.ustring.sub( adjust, ix, ix ) | ||
+ | if c == " " or c == "(" then | ||
+ | nx = nx - 1 | ||
+ | s = c .. s | ||
+ | else | ||
+ | break -- for ix | ||
+ | end | ||
+ | end -- for ix | ||
+ | r = form( nx ) .. s | ||
j = false | j = false | ||
k = i | k = i | ||
Zeile 197: | Zeile 258: | ||
args = frame.args | args = frame.args | ||
end | end | ||
− | return Text.concatParams( args, frame.args.separator, frame.args.format ) | + | return Text.concatParams( args, |
+ | frame.args.separator, | ||
+ | frame.args.format ) | ||
end | end | ||
Zeile 221: | Zeile 284: | ||
function p.sentenceTerminated( frame ) | function p.sentenceTerminated( frame ) | ||
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" | return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" | ||
+ | end | ||
+ | |||
+ | function p.ucfirstAll( frame ) | ||
+ | return Text.ucfirstAll( frame.args[ 1 ] or "" ) | ||
end | end | ||
Version vom 27. September 2014, 10:59 Uhr
--[=[ 2014-09-27 Text utilities ]=]
local Text = { } local patternCJK = false local patternLatin = false local patternTerminated = false
Text.concatParams = function ( args, apply, adapt )
-- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return table.concat( collect, apply or "|" )
end -- Text.concatParams()
Text.containsCJK = function ( analyse )
-- Is any CJK code within? -- Parameter: -- analyse -- string -- Returns: true, if CJK detected local r if not patternCJK then patternCJK = mw.ustring.char( 91, 13312, 45, 40959, 131072, 45, 178207, 93 ) end if mw.ustring.find( analyse, patternCJK ) then r = true else r = false end return r
end -- Text.containsCJK()
Text.listToText = function ( args, adapt )
-- Format list items similar to mw.text.listToText() -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return mw.text.listToText( collect )
end -- Text.listToText()
Text.sentenceTerminated = function ( analyse )
-- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not patternTerminated then patternTerminated = mw.ustring.char( 91, 12290, 65281, 65294, 65311 ) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find( analyse, patternTerminated ) then r = true else r = false end return r
end -- Text.sentenceTerminated()
Text.ucfirstAll = function ( adjust )
-- Capitalize all words -- Precondition: -- adjust -- string -- Returns: string with all first letters in upper case local r = " " .. adjust local i = 1 local c, j, m if adjust:find( "&" ) then r = r:gsub( "&", "&" ) :gsub( "<", "<" ) :gsub( ">", ">" ) :gsub( " ", " " ) :gsub( " ", " " ) :gsub( "", "" ) :gsub( "", "" ) :gsub( "", "" ) :gsub( "", "" ) m = true end while i do i = mw.ustring.find( r, "%W%l", i ) if i then j = i + 1 c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) r = string.format( "%s%s%s", mw.ustring.sub( r, 1, i ), c, mw.ustring.sub( r, i + 2 ) ) i = j end end -- while i r = r:sub( 2 ) if m then r = r:gsub( "&", "&" ) :gsub( "<", "<" ) :gsub( ">", ">" ) :gsub( " ", " " ) :gsub( " ", " " ) :gsub( "", "" ) :gsub( "", "" ) :gsub( "", "" ) :gsub( "", "" ) :gsub( "&#X(%x+);", "&#x%1;" ) end return r
end -- Text.ucfirstAll()
Text.uprightNonlatin = function ( adjust )
-- Ensure non-italics for non-latin text parts
-- One single greek letter might be granted
-- Precondition:
-- adjust -- string
-- Returns: string with non-latin parts enclosed in
local r
if not patternLatin then
patternLatin = mw.ustring.char( 94, 91,
7, 45, 591,
8194, 45, 8250,
93, 42, 36 )
end
if mw.ustring.match( adjust, patternLatin ) then
-- latin only, horizontal dashes, quotes
r = adjust
else
local c
local j = false
local k = 1
local m = false
local n = mw.ustring.len( adjust )
local span = "%s%s%s"
local flat = function ( a )
-- isLatin
return a <= 591 or ( a >= 8194 and a <= 8250 )
end -- flat()
local form = function ( a )
return string.format( span,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
end -- form()
r = ""
for i = 1, n do
c = mw.ustring.codepoint( adjust, i, i )
if c > 64 or c == 38 or c == 60 then -- '&' '<'
if flat( c ) then
if j then
if m then
if i == m then
-- single greek letter.
j = false
end
m = false
end
if j then
local nx = i - 1
local s = ""
for ix = nx, 1, -1 do
c = mw.ustring.sub( adjust, ix, ix )
if c == " " or c == "(" then
nx = nx - 1
s = c .. s
else
break -- for ix
end
end -- for ix
r = form( nx ) .. s
j = false
k = i
end
end
elseif not j then
j = i
if c >= 880 and c <= 1023 then
-- single greek letter?
m = i + 1
else
m = false
end
end
elseif m then
m = m + 1
end
end -- for i
if j and ( not m or m < n ) then
r = form( n )
else
r = r .. mw.ustring.sub( adjust, k )
end
end
return r
end -- Text.uprightNonlatin()
-- Export local p = { }
function p.concatParams( frame )
local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.concatParams( args, frame.args.separator, frame.args.format )
end
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.listToText( frame )
local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.listToText( args, frame.args.format )
end
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
p.Text = function ()
return Text
end -- p.Text
return p