Modul:Text: Unterschied zwischen den Versionen

Aus FreeWiki
Zur Navigation springen Zur Suche springen
te>Umherirrender
K (export concatParams and allow format string)
te>PerfektesChaos
(+ ucfirstAll, update uprightNonlatin)
Zeile 1: Zeile 1:
--[=[ 2014-02-16
+
--[=[ 2014-09-27
 
Text utilities
 
Text utilities
 
]=]
 
]=]
Zeile 103: Zeile 103:
 
     return r
 
     return r
 
end -- Text.sentenceTerminated()
 
end -- Text.sentenceTerminated()
 +
 +
 +
 +
Text.ucfirstAll = function ( adjust )
 +
    -- Capitalize all words
 +
    -- Precondition:
 +
    --    adjust  -- string
 +
    -- Returns: string with all first letters in upper case
 +
    local r = " " .. adjust
 +
    local i = 1
 +
    local c, j, m
 +
    if adjust:find( "&" ) then
 +
        r = r:gsub( "&",      "&" )
 +
            :gsub( "<",      "<" )
 +
            :gsub( ">",      ">" )
 +
            :gsub( " ",    " " )
 +
            :gsub( " ", " " )
 +
            :gsub( "‌",  "‌" )
 +
            :gsub( "‍",    "‍" )
 +
            :gsub( "‎",    "‎" )
 +
            :gsub( "‏",    "‏" )
 +
        m = true
 +
    end
 +
    while i do
 +
        i = mw.ustring.find( r, "%W%l", i )
 +
        if i then
 +
            j = i + 1
 +
            c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
 +
            r = string.format( "%s%s%s",
 +
                              mw.ustring.sub( r, 1, i ),
 +
                              c,
 +
                              mw.ustring.sub( r, i + 2 ) )
 +
            i = j
 +
        end
 +
    end -- while i
 +
    r = r:sub( 2 )
 +
    if m then
 +
        r = r:gsub(    "&", "&" )
 +
            :gsub(    "<", "<" )
 +
            :gsub(    ">", ">" )
 +
            :gsub(    " ", " " )
 +
            :gsub(  " ", " " )
 +
            :gsub(  "‌", "‌" )
 +
            :gsub(  "‍", "‍" )
 +
            :gsub(  "‎", "‎" )
 +
            :gsub(  "‏", "‏" )
 +
            :gsub( "&#X(%x+);", "&#x%1;" )
 +
    end
 +
    return r
 +
end -- Text.ucfirstAll()
  
  
Zeile 132: Zeile 182:
 
                 -- isLatin
 
                 -- isLatin
 
                 return  a <= 591  or  ( a >= 8194  and  a <= 8250 )
 
                 return  a <= 591  or  ( a >= 8194  and  a <= 8250 )
        end -- flat()
+
              end -- flat()
 
         local form = function ( a )
 
         local form = function ( a )
 
                 return string.format( span,
 
                 return string.format( span,
Zeile 138: Zeile 188:
 
                                       mw.ustring.sub( adjust, k, j - 1 ),
 
                                       mw.ustring.sub( adjust, k, j - 1 ),
 
                                       mw.ustring.sub( adjust, j, a ) )
 
                                       mw.ustring.sub( adjust, j, a ) )
        end -- form()
+
              end -- form()
 
         r = ""
 
         r = ""
 
         for i = 1, n do
 
         for i = 1, n do
 
             c = mw.ustring.codepoint( adjust, i, i )
 
             c = mw.ustring.codepoint( adjust, i, i )
             if c > 64 then
+
             if c > 64 or  c == 38  or  c == 60 then   -- '&' '<'
 
                 if flat( c ) then
 
                 if flat( c ) then
 
                     if j then
 
                     if j then
Zeile 153: Zeile 203:
 
                         end
 
                         end
 
                         if j then
 
                         if j then
                             r = form( i - 1 )
+
                            local nx = i - 1
 +
                            local s  = ""
 +
                            for ix = nx, 1, -1 do
 +
                                c = mw.ustring.sub( adjust, ix, ix )
 +
                                if c == " "  or  c == "(" then
 +
                                    nx = nx - 1
 +
                                    s  = c .. s
 +
                                else
 +
                                    break -- for ix
 +
                                end
 +
                            end -- for ix
 +
                             r = form( nx ) .. s
 
                             j = false
 
                             j = false
 
                             k = i
 
                             k = i
Zeile 197: Zeile 258:
 
         args = frame.args
 
         args = frame.args
 
     end
 
     end
     return Text.concatParams( args, frame.args.separator, frame.args.format )
+
     return Text.concatParams( args,
 +
                              frame.args.separator,
 +
                              frame.args.format )
 
end
 
end
  
Zeile 221: Zeile 284:
 
function p.sentenceTerminated( frame )
 
function p.sentenceTerminated( frame )
 
     return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
 
     return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
 +
end
 +
 +
function p.ucfirstAll( frame )
 +
    return Text.ucfirstAll( frame.args[ 1 ] or "" )
 
end
 
end
  

Version vom 27. September 2014, 10:59 Uhr

--[=[ 2014-09-27 Text utilities ]=]


local Text = { } local patternCJK = false local patternLatin = false local patternTerminated = false


Text.concatParams = function ( args, apply, adapt )

   -- Concat list items into one string
   -- Parameter:
   --     args   -- table (sequence) with numKey=string
   --     apply  -- string (optional); separator (default: "|")
   --     adapt  -- string (optional); format including "%s"
   -- Returns: string
   local collect = { }
   for k, v in pairs( args ) do
       if type( k ) == "number" then
           v = mw.text.trim( v )
           if v ~= "" then
               if adapt then
                   v = mw.ustring.format( adapt, v )
               end
               table.insert( collect, v )
           end
       end
   end
   return table.concat( collect,  apply or "|" )

end -- Text.concatParams()


Text.containsCJK = function ( analyse )

   -- Is any CJK code within?
   -- Parameter:
   --     analyse  -- string
   -- Returns: true, if CJK detected
   local r
   if not patternCJK then
       patternCJK = mw.ustring.char( 91,
                                      13312, 45,  40959,
                                     131072, 45, 178207,
                                     93 )
   end
   if mw.ustring.find( analyse, patternCJK ) then
       r = true
   else
       r = false
   end
   return r

end -- Text.containsCJK()


Text.listToText = function ( args, adapt )

   -- Format list items similar to mw.text.listToText()
   -- Parameter:
   --     args   -- table (sequence) with numKey=string
   --     adapt  -- string (optional); format including "%s"
   -- Returns: string
   local collect = { }
   for k, v in pairs( args ) do
       if type( k ) == "number" then
           v = mw.text.trim( v )
           if v ~= "" then
               if adapt then
                   v = mw.ustring.format( adapt, v )
               end
               table.insert( collect, v )
           end
       end
   end
   return mw.text.listToText( collect )

end -- Text.listToText()


Text.sentenceTerminated = function ( analyse )

   -- Is string terminated by dot, question or exclamation mark?
   --     Quotation, link termination and so on granted
   -- Parameter:
   --     analyse  -- string
   -- Returns: true, if sentence terminated
   local r
   if not patternTerminated then
       patternTerminated = mw.ustring.char( 91,
                                            12290,
                                            65281,
                                            65294,
                                            65311 )
                           .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
   end
   if mw.ustring.find( analyse, patternTerminated ) then
       r = true
   else
       r = false
   end
   return r

end -- Text.sentenceTerminated()


Text.ucfirstAll = function ( adjust )

   -- Capitalize all words
   -- Precondition:
   --     adjust  -- string
   -- Returns: string with all first letters in upper case
   local r = " " .. adjust
   local i = 1
   local c, j, m
   if adjust:find( "&" ) then
       r = r:gsub( "&",      "&" )
            :gsub( "<",       "<" )
            :gsub( ">",       ">" )
            :gsub( " ",    " " )
            :gsub( " ", " " )
            :gsub( "‌",   "‌" )
            :gsub( "‍",    "‍" )
            :gsub( "‎",    "‎" )
            :gsub( "‏",    "‏" )
       m = true
   end
   while i do
       i = mw.ustring.find( r, "%W%l", i )
       if i then
           j = i + 1
           c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
           r = string.format( "%s%s%s",
                              mw.ustring.sub( r, 1, i ),
                              c,
                              mw.ustring.sub( r, i + 2 ) )
           i = j
       end
   end -- while i
   r = r:sub( 2 )
   if m then
       r = r:gsub(     "&", "&" )
            :gsub(     "<", "<" )
            :gsub(     ">", ">" )
            :gsub(    " ", " " )
            :gsub(   " ", " " )
            :gsub(   "‌", "‌" )
            :gsub(   "‍", "‍" )
            :gsub(   "‎", "‎" )
            :gsub(   "‏", "‏" )
            :gsub( "&#X(%x+);", "&#x%1;" )
   end
   return r

end -- Text.ucfirstAll()


Text.uprightNonlatin = function ( adjust )

   -- Ensure non-italics for non-latin text parts
   --     One single greek letter might be granted
   -- Precondition:
   --     adjust  -- string
   -- Returns: string with non-latin parts enclosed in 
   local r
   if not patternLatin then
       patternLatin = mw.ustring.char(   94, 91,
                                          7, 45,  591,
                                       8194, 45, 8250,
                                         93, 42, 36 )
   end
   if mw.ustring.match( adjust, patternLatin ) then
       -- latin only, horizontal dashes, quotes
       r = adjust
   else
       local c
       local j    = false
       local k    = 1
       local m    = false
       local n    = mw.ustring.len( adjust )
       local span = "%s%s%s"
       local flat = function ( a )
               -- isLatin
               return  a <= 591   or   ( a >= 8194  and  a <= 8250 )
             end -- flat()
       local form = function ( a )
               return string.format( span,
                                     r,
                                     mw.ustring.sub( adjust, k, j - 1 ),
                                     mw.ustring.sub( adjust, j, a ) )
             end -- form()
       r = ""
       for i = 1, n do
           c = mw.ustring.codepoint( adjust, i, i )
           if c > 64  or  c == 38  or  c == 60 then    -- '&' '<'
               if flat( c ) then
                   if j then
                       if m then
                           if i == m then
                               -- single greek letter.
                               j = false
                           end
                           m = false
                       end
                       if j then
                           local nx = i - 1
                           local s  = ""
                           for ix = nx, 1, -1 do
                               c = mw.ustring.sub( adjust, ix, ix )
                               if c == " "  or  c == "(" then
                                   nx = nx - 1
                                   s  = c .. s
                               else
                                   break -- for ix
                               end
                           end -- for ix
                           r = form( nx ) .. s
                           j = false
                           k = i
                       end
                   end
               elseif not j then
                   j = i
                   if c >= 880  and  c <= 1023 then
                       -- single greek letter?
                       m = i + 1
                   else
                       m = false
                   end
               end
           elseif m then
               m = m + 1
           end
       end -- for i
       if j  and  ( not m  or  m < n ) then
           r = form( n )
       else
           r = r .. mw.ustring.sub( adjust, k )
       end
   end
   return r

end -- Text.uprightNonlatin()


-- Export local p = { }

function p.concatParams( frame )

   local args
   local template = frame.args.template
   if type( template ) == "string" then
       template = mw.text.trim( template )
       template = ( template == "1" )
   end
   if template then
       args = frame:getParent().args
   else
       args = frame.args
   end
   return Text.concatParams( args,
                             frame.args.separator,
                             frame.args.format )

end

function p.containsCJK( frame )

   return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""

end

function p.listToText( frame )

   local args
   local template = frame.args.template
   if type( template ) == "string" then
       template = mw.text.trim( template )
       template = ( template == "1" )
   end
   if template then
       args = frame:getParent().args
   else
       args = frame.args
   end
   return Text.listToText( args, frame.args.format )

end

function p.sentenceTerminated( frame )

   return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""

end

function p.ucfirstAll( frame )

   return Text.ucfirstAll( frame.args[ 1 ] or "" )

end

function p.uprightNonlatin( frame )

   return Text.uprightNonlatin( frame.args[ 1 ] or "" )

end

p.Text = function ()

   return Text

end -- p.Text

return p