Modul:Text

Aus FreeWiki
Version vom 9. November 2013, 16:18 Uhr von te>PerfektesChaos (update)
Zur Navigation springen Zur Suche springen

--[=[ 2013-11-07 Text utilities ]=]


local Text = { } local patternCJK = false local patternLatin = false local patternTerminated = false


Text.containsCJK = function ( analyse )

   -- Is any CJK code within?
   -- Parameter:
   --     analyse  -- string
   -- Returns: true, if CJK detected
   if not patternCJK then
       patternCJK = mw.ustring.char( 91,
                                      13312, 45,  40959,
                                     131072, 45, 178207,
                                     93 )
   end
   if mw.ustring.find( analyse, patternCJK ) then
       r = true
   else
       r = false
   end
   return r

end -- Text.containsCJK()


Text.sentenceTerminated = function ( analyse )

   -- Is string terminated by dot, question or exclamation mark?
   --     Quotation, link termination and so on granted
   -- Parameter:
   --     analyse  -- string
   -- Returns: true, if sentence terminated
   local r
   if not patternTerminated then
       patternTerminated = mw.ustring.char( 91,
                                            12290,
                                            65281,
                                            65294,
                                            65311 )
                           .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
   end
   if mw.ustring.find( analyse, patternTerminated ) then
       r = true
   else
       r = false
   end
   return r

end -- Text.sentenceTerminated()


Text.uprightNonlatin = function ( adjust )

   -- Ensure non-italics for non-latin text parts
   --     One single greek letter might be granted
   -- Precondition:
   --     adjust  -- string
   -- Returns: string with non-latin parts enclosed in 
   local r
   if not patternLatin then
       patternLatin = mw.ustring.char(   94, 91,
                                          7, 45,  591,
                                       8194, 45, 8250,
                                         93, 42, 36 )
   end
   if mw.ustring.match( adjust, patternLatin ) then
       -- latin only, horizontal dashes, quotes
       r = adjust
   else
       local c
       local j    = false
       local k    = 1
       local m    = false
       local n    = mw.ustring.len( adjust )
       local span = "%s%s%s"
       local flat = function ( a )
               -- isLatin
               return  a <= 591   or   ( a >= 8194  and  a <= 8250 )
       end -- flat()
       local form = function ( a )
               return string.format( span,
                                     r,
                                     mw.ustring.sub( adjust, k, j - 1 ),
                                     mw.ustring.sub( adjust, j, a ) )
       end -- form()
       r = ""
       for i = 1, n do
           c = mw.ustring.codepoint( adjust, i, i )
           if c > 32 then
               if flat( c ) then
                   if j then
                       if m then
                           if i == m then
                               -- single greek letter.
                               j = false
                           end
                           m = false
                       end
                       if j then
                           r = form( i - 1 )
                           j = false
                           k = i
                       end
                   end
               elseif not j then
                   j = i
                   if c >= 880  and  c <= 1023 then
                       -- single greek letter?
                       m = i + 1
                   else
                       m = false
                   end
               end
           elseif m then
               m = m + 1
           end
       end -- for i
       if j  and  m < n then
           r = form( n )
       else
           r = r .. mw.ustring.sub( adjust, k )
       end
   end
   return r

end -- Text.uprightNonlatin()


-- Export local p = { }

function p.containsCJK( frame )

   return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""

end

function p.sentenceTerminated( frame )

   return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""

end

function p.uprightNonlatin( frame )

   return Text.uprightNonlatin( frame.args[ 1 ] or "" )

end

p.Text = function ()

   return Text

end -- p.Text

return p