Jump to content

Module:TaxonItalics

Permanently protected module
From Wikipedia, the free encyclopedia

--[[=========================================================================
Italicize a taxon name appropriately by invoking italicizeTaxonName.
The algorithm used is:
* If the name has italic markup at the start or the end, do nothing.
* Else
  * Remove (internal) italic markup.
  * If the name is made up of four words and the third word is a
    botanical connecting term, de-italicize the connecting term and add italic
    markup to the outside of the name.
  * Else if the name is made up of three words and the second word is a
    botanical connecting term or a variant of "cf.", de-italicize the
    connecting term and add italic markup to the outside of the name.
  * Else just add italic markup to the outside of the name.
 The module also:
 * Ensures that the hybrid symbol, ×, and parentheses are not italicized, as
   well as any string inside parentheses if dab is true.
 * Has an option to abbreviate all parts of taxon names other than the last
   to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
   "P. s. var. sylvestris").
 * Has an option to wikilink the italicized name to the input name.
=============================================================================]]

local p = {}
local l = {} -- used to store purely local functions

--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris)
local cTerms3 = {
	--subsp.
    subspecies = "subsp.",
    ["subsp."] = "subsp.",
    subsp = "subsp.",
    ["ssp."] = "subsp.",
    ssp = "subsp.",
    --var.
    varietas = "var.",
    ["var."] = "var.",
    var = "var.",
    --subvar.
    subvarietas = "subvar.",
    ["subvar."] = "subvar.",
    subvar = "subvar.",
    --f.
    forma = "f.",
    ["f."] = "f.",
    f = "f.",
    --subf.
    subforma = "subf.",
    ["subf."] = "subf.",
    subf = "subf."
    }
--connecting terms in two part names (e.g. Pinus sect. Pinus)
local cTerms2 = {
	--subg.
    subgenus = "subg.",
    ["subgen."] = "subg.",
    ["subg."] = "subg.",
    subg = "subg.",
    --supersect.
    supersection = "supersect.",
    ["supersect."] = "supersect.",
    supersect = "supersect.",
    --sect.
    section = "sect.",
    ["sect."] = "sect.",
    sect = "sect.",
    --subsect.
    subsection = "subsect.",
    ["subsect."] = "subsect.",
    subsect = "subsect.",
    --ser.
    series = "ser.",
    ["ser."] = "ser.",
    ser = "ser.",
    --subser.
    subseries = "subser.",
    ["subser."] = "subser.",
    subser = "subser.",
    --cf.
    cf = "cf.",
    ["cf."] = "cf.",
    ["c.f."] = "cf."
    }

--[[=========================================================================
Main function to italicize a taxon name appropriately. For the purpose of the
parameters, see p.italicizeTaxonName().
=============================================================================]]
function p.main(frame)
    local name = frame.args[1] or ''
    local linked = frame.args['linked'] == 'yes'
    local abbreviated = frame.args['abbreviated'] == 'yes'
    local dab = frame.args['dab'] == 'yes'
    return p.italicizeTaxonName(name, linked, abbreviated, dab)
end

--[[=========================================================================
Utility local function to abbreviate an input string to its first character
followed by ".".
Both "×" and an HTML entity at the start of the string are skipped over in
determining first character, as is an opening parenthesis and an opening ",
which cause a matching closing character to be included.
=============================================================================]]
function l.abbreviate(str)
	local result = ""
	local hasParentheses = false
	local isQuoted = false
	if mw.ustring.len(str) < 2 then
		--single character strings are left unchanged
		result = str
	else
		--skip over an opening parenthesis that could be present at the start of the string
		if mw.ustring.sub(str,1,1) == "(" then
			hasParentheses = true
			result = "(" 
			str = mw.ustring.sub(str,2,mw.ustring.len(str))
		elseif mw.ustring.sub(str,1,1) == '"' then
			isQuoted = true
			result = '"'
			str = mw.ustring.sub(str,2,mw.ustring.len(str))
		end
		--skip over a hybrid symbol that could be present at the start of the string
		if mw.ustring.sub(str,1,1) == "×" then
			result = "×" 
			str = mw.ustring.sub(str,2,mw.ustring.len(str))
		end
		--skip over an HTML entity that could be present at the start of the string
		if mw.ustring.sub(str,1,1) == "&" then
			local i,dummy = mw.ustring.find(str,";",2,plain)
			result = result .. mw.ustring.sub(str,1,i)
			str = mw.ustring.sub(str,i+1,mw.ustring.len(str))
		end
		--if there's anything left, reduce it to its first character plus ".",
		--adding the closing parenthesis or quote if required
		if str ~= "" then 
			result = result .. mw.ustring.sub(str,1,1) .. "."
			if hasParentheses then result = result .. ")"
			elseif isQuoted then result = result .. '"'
			end
		end
	end
	return result
end

--[[=========================================================================
The function which does the italicization. Parameters:
 name (string) – the taxon name to be processed
 linked (boolean) – should a wikilink be generated?
 abbreviated (boolean) – should the first parts of the taxon name be
   reduced to capital letters?
 dab (boolean) – should any parenthesized part be treated as a disambiguation
   term and left unitalicized?
=============================================================================]]
function p.italicizeTaxonName(name, linked, abbreviated, dab)
    name = mw.text.trim(name)
    -- if the name begins with '[', then assume formatting is present
    if  mw.ustring.sub(name,1,1) == '[' then return name end
    -- otherwise begin by replacing any use of the HTML italic tags
    -- by Wikimedia markup; replace any entity alternatives to the hybrid symbol
    -- by the symbol itself; prevent the hybrid symbol being treated as
    -- a 'word' by converting a following space to the HTML entity
    local italMarker = "''"
    name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
    name = string.gsub(string.gsub(name, "&#215;", "×"), "&times;", "×")
    name = string.gsub(name, "</?span.->", "") -- remove any span markup
    name = string.gsub(name, "× ", "×&#32;")
    -- now italicize and abbreviate if required
    local result = name
    if name ~= '' then
        if string.sub(name,1,2) == italMarker or string.sub(name,-2) == italMarker then
            -- do nothing if the name already has italic markers at the start or end
        else
            name = string.gsub(name, italMarker, "") -- first remove any internal italics
            local words = mw.text.split(name, " ", true)
            if #words == 4 and cTerms3[words[3]] then
                -- the third word of a four word name is a connecting term
                -- ensure the connecting term isn't italicized
                words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>'
                if abbreviated then
                	words[1] = l.abbreviate(words[1])
                    words[2] = l.abbreviate(words[2])
            	end
                result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
            elseif #words == 3 and cTerms2[words[2]] then
                -- the second word of a three word name is a connecting term
                -- ensure the connecting term isn't italicized
                words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>'
                if abbreviated then
                	words[1] = l.abbreviate(words[1])
                end
                result = words[1] .. " " .. words[2] .. " " .. words[3]
            else
                -- not a name as above; only deal with abbreviation
                if abbreviated then
                	if #words > 1 then
                		result = l.abbreviate(words[1])
                		for i = 2, #words-1, 1 do
                			result = result .. " " .. l.abbreviate(words[i])
                		end
                		result = result .. " " .. words[#words]
                	end
                else
                	result = name
                end
            end
            -- deal with any hybrid symbol as it should not be italicized
            result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
             -- deal with any parentheses as they should not be italicized
            if dab then
            	result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">('),"%)",')</span>')
            else
            	result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>')
            end
            -- any question marks surrounded by spans can have the spans joined
            result = string.gsub(result,'</span>%?<span style="font%-style:normal;">','?')
        	-- add outside markup
            if linked then
                if result ~= name then
                    result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]"
                else
                    result = italMarker .. "[[" .. name .. "]]" .. italMarker
                end
            else
                result = italMarker .. result .. italMarker
            end
        end
    end
    return result
end

--[[=========================================================================
Utility function used by other modules to check if a connecting term is
present in a name. The value of name is assumed to be plain text.
=============================================================================]]
function p.hasCT(frame)
    return p.hasConnectingTerm(frame.args[1] or '')
end
    
function p.hasConnectingTerm(name)
	local words = mw.text.split(name, " ", true)
	if #words == 4 and cTerms3[words[3]] then return true
	elseif #words == 3 and cTerms2[words[2]] then return true
	else return false
	end
end
	
return p
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy