Module:IPAc2-mh: Difference between revisions

Content deleted Content added
Updated some comments near the top. This update mirrors that at Wiktionary.
Uncaching the library functions, per User:Erutuon's performance analysis.
Line 11:
 
local export = {}
 
-- Cached functions, to help improve performance at Wiktionary.
local math_max = math.max
local math_min = math.min
local mw_text_gsplit = mw.text.gsplit
local mw_text_split = mw.text.split
local mw_text_trim = mw.text.trim
local mw_ustring_gsub = mw.ustring.gsub
local mw_ustring_lower = mw.ustring.lower
local string_byte = string.byte
local string_find = string.find
local string_gmatch = string.gmatch
local string_gsub = string.gsub
local string_lower = string.lower
local string_sub = string.sub
local table_concat = table.concat
local table_remove = table.remove
 
local ASYLL = "̯"
Line 74 ⟶ 57:
return ""
else
local atLeft = string_bytestring.byte(text) == 32
local atRight = string_bytestring.byte(text, -1) == 32
if atLeft then
if atRight then
return string_substring.sub(text, 2, -2)
else
return string_substring.sub(text, 2)
end
elseif atRight then
return string_substring.sub(text, 1, -2)
else
return text
Line 92 ⟶ 75:
local function parseBoolean(text)
if type(text) == "string" then
text = string_gsubstring.gsub(text, "[^0-9A-Za-z]", "")
if text ~= "" and text ~= "0" and string_lowerstring.lower(text) ~= "false" then
return true
end
Line 103 ⟶ 86:
chars = chars or {}
local index = 1
for ch in string_gmatchstring.gmatch(text, pattern or UTF8_CHAR) do
chars[index] = ch
index = index + 1
Line 109 ⟶ 92:
if index <= #chars then
if shorten then
table_removetable.remove(chars, index)
else
repeat
Line 121 ⟶ 104:
 
local function string_gsub2(text, pattern, subst)
return string_gsubstring.gsub(string_gsubstring.gsub(text, pattern, subst), pattern, subst)
end
 
Line 147 ⟶ 130:
local function ZTBL(text, sep)
local tbl = {}
for key in mw_text_gsplitmw.text.gsplit(text, sep or " ") do
tbl[key] = true
end
Line 162 ⟶ 145:
local outSeq = {}
code = mw_ustring_gsubmw.ustring.gsub(code, "%s+", " ")
code = string_lowerstring.lower(code)
for text in mw_text_gsplitmw.text.gsplit(code, " *,[ ,]*") do
text = fastTrim(text)
if text ~= "" then
local temp = string_gsubstring.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "")
if temp ~= "" then
error("'"..code.."' contains unsupported characters: "..temp)
Line 178 ⟶ 161:
["y"] = "0", ["h"] = "0h", ["w"] = "0w"
}
text = string_gsubstring.gsub(text, "_*([hwy])_+", PARSE_PSEUDO_GLIDE)
text = string_gsubstring.gsub(text, "_+([hwy])", PARSE_PSEUDO_GLIDE)
if string_findstring.find(text, "_") then
error("contains misplaced underscores: "..code)
end
-- a plain {i} protected from dialect-specific reflexes
text = string_gsubstring.gsub(text, "'i", "I")
-- "yi'y" and "'yiy" sequences
text = string_gsubstring.gsub(text, "('?)yi('*)y", function(aposA, aposB)
if aposA ~= "" then
-- "dwelling upon" i
Line 222 ⟶ 205:
["0w"] = "_W"
}
text = string_gsubstring.gsub(text, "[klmnr0]g?[hw]?", PARSE_C_CH_CW)
if string_findstring.find(text, "g") then
error("contains g that is not part of ng: "..code)
end
Line 245 ⟶ 228:
["'"] = ""
}
text = string_gsubstring.gsub(text, ".", PARSE_REMAINING)
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics,
-- but allow VC, CV at affix boundaries
-- where a vowel may link to another morpheme's consonant.
temp = string_gsubstring.gsub(text, "[%s%-]+", "")
if string_findstring.find(temp, "_..[jGw]") or
string_findstring.find(temp, ".[jGw]_.")
then
error("pseudo-glides may not neighbor a consonant")
end
if string_findstring.find(temp, VI.."_."..VI) then
error("pseudo-glides may only be at the beginning or end"..code)
end
if string_findstring.find(temp, VI..VI) then
error("vowels must be separated by a consonant: "..code)
end
if string_findstring.find(temp, ".[jGw].[jGw].[jGw]") then
error("each consonant cluster is limited to two: "..code)
end
if string_findstring.find(temp, ".[jGw].[jGw]$") then
error("may not end with a consonant cluster: "..code)
end
string_gsubstring.gsub(temp, "^(.[jGw])(.[jGw])", function(consonX, consonY)
if consonX ~= consonY then
error("may only begin with single or geminated consonant: "
Line 350 ⟶ 333:
local version = args and args.version
local map = BENDER_MAPS[
type(version) == "string" and string_lowerstring.lower(version) or ""
] or BENDER_DEFAULT
local outSeq = {}
for _, text in pairs(inSeq) do
text = string_gsubstring.gsub(text, ".[jGw]?", map)
addUnique(outSeq, text)
end
Line 372 ⟶ 355:
 
local function toMOD(text)
text = mw_ustring_gsubmw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", TO_MOD)
return text
end
Line 412 ⟶ 395:
local outSeq = {}
for _, text in pairs(inSeq) do
text = string_gsubstring.gsub(text, ".[jGw]?", PHONEMIC_MAP)
addUnique(outSeq, text)
end
Line 452 ⟶ 435:
local function maxF1(a, b)
if b then
return VOWEL[math_maxmath.max(2, F1[a], F1[b])][F2_FRONT]
else
return VOWEL[math_maxmath.max(2, F1[a])][F2_FRONT]
end
end
Line 461 ⟶ 444:
-- Morphemes can begin with geminated consonants, but spoken words cannot.
text = string_gsubstring.gsub(text, "^(.[jGw])( *)%1( *)("..VI..")",
function(conson, _, __, vowel)
if conson == "hG" then
Line 482 ⟶ 465:
-- To block this in the template argument, use "'i" instead of "i".
text = " "..text
text = string_gsubstring.gsub(text,
"([ jGw])( *)(h[jw])( *)i( *)(h[jw])( *)("..VI..")",
function(nonVowel, _, consonX, __, ___, consonY, ____, vowel)
Line 506 ⟶ 489:
end
)
text = string_substring.sub(text, 2)
-- Restore protected {i}, we won't be checking for it anymore.
text = string_gsubstring.gsub(text, "I", "i")
return text
Line 584 ⟶ 567:
["l"] = { ["j"] = "L" }
}
for primary in mw_text_gsplitmw.text.gsplit("ptkmnNrl", "") do
local map2 = map[primary]
if not map2 then
Line 687 ⟶ 670:
map["Hj"] = map["Hj"] or map["i"]..map["^"]
local key
for primary in mw_text_gsplitmw.text.gsplit("pbBtdTDSZszkgmnJNrRlL_", "") do
for secondary in mw_text_gsplitmw.text.gsplit("jGw", "") do
key = primary..secondary
map[key] = map[key] or (map[primary]..map[secondary])
end
end
for vowel in mw_text_gsplitmw.text.gsplit(V_, "") do
key = vowel.."@"
map[key] = map[key] or (map[vowel]..map["@"])
Line 712 ⟶ 695:
-- and no pseudo-glide, display phrase up to five times
-- with each of the different pseudo-glides and possible vowel reflexes.
if IS_VOWEL[string_substring.sub(text, 1, 1)] then
text = "_j"..code
toPhoneticRemainder(text, config, false, rightFlag)
Line 730 ⟶ 713:
return
end
if IS_VOWEL[string_substring.sub(text, -1)] then
text = code.."_j"
toPhoneticRemainder(text, config, leftFlag, false)
Line 782 ⟶ 765:
end
toPhoneticRemainder(code, config)
addUnique(outSeq, table_concattable.concat(subSeq, " ~ "))
config.outSeq = outSeq
config.initialJ = initialJ
Line 793 ⟶ 776:
if not diphthongs then
-- {ww} always causes the second glide to surface.
text = string_gsubstring.gsub(text, "([aEei])( *)hw( *)hw", "%1%2hw%1@%3Hw")
end
text = string_gsubstring.gsub(text, "([aEei])( *)hG( *.[jGw])", "%1%2hG%1@%3")
text = string_gsubstring.gsub(text, "(.[jGw])( *)hG( *)([aEei])", "%1%4@%2hG%3%4")
text = string_gsubstring.gsub(text, "([aEei])( *)h(.)( *.[jGw])", "%1%2h%3%1@%4")
text = string_gsubstring.gsub(text, "(.[jGw])( *)h(. *)([aEei])", "%1%4@%2h%3%4")
text = string_gsubstring.gsub(text, "(.[jGw])( *[yY].)", "%1i@%2")
-- Preserve these exceptionally stable clusters.
text = string_gsubstring.gsub(text, "l([jG] *)tG", "l%1|tG")
-- Unstable consonant clusters trigger epenthesis.
-- Liquids before coronal obstruents.
text = string_gsubstring.gsub(text, "([rl].)( *)t", "%1v%2t")
-- Nasals and liquids after coronal obstruents.
text = string_gsubstring.gsub(text, "t(.)( *[nrl])", "t%1v%2")
-- Heterorganic clusters.
-- Labial consonants neighboring coronal or dorsal consonants.
text = string_gsubstring.gsub(text, "([pm].)( *[tnrlkN])", "%1v%2")
-- Coronal consonants neighboring labial or dorsal consonants.
text = string_gsubstring.gsub(text, "([tnrl].)( *[pmkN])", "%1v%2")
-- Dorsal consonants neighboring labial or coronal consonants.
text = string_gsubstring.gsub(text, "([kN].)( *[pmtnrl])", "%1v%2")
-- Organic speech involves certain consonant cluster assimilations.
Line 827 ⟶ 810:
-- Forward assimilation of rounded consonants.
-- There is no rounded coronal obstruent.
text = string_gsubstring.gsub(text, "(w *[^t])[jG]", "%1w")
-- Backward assimilation of remaining secondary articulations.
text = string_gsubstring.gsub(text, "[jGw]( *.)([jGw])", "%2%1%2")
-- Backward nasal assimilation of primary articulations.
text = string_gsubstring.gsub(text, "[pkrl](. *)([mnN])", "%2%1%2")
-- No longer need to protect exceptionally stable consonant clusters.
text = string_gsubstring.gsub(text, "|", "")
-- Give a vowel height to all epenthetic vowels that still lack one.
Line 846 ⟶ 829:
-- Tag all vowels for next set of operations.
text = string_gsubstring.gsub(text, "([aEei])", "/%1")
-- There is no variation in the surface realizations of vowels
Line 885 ⟶ 868:
local f2
if flag then
f2 = math_maxmath.max(f2L, f2R)
else
f2 = math_minmath.min(f2L, f2R)
end
return (
Line 895 ⟶ 878:
)
end
text = string_gsubstring.gsub(text, "(_)([jGw])( *)/("..V..")(@?)( *)(.)([jGw])",
function(a, b, c, d, e, f, g, h)
return subst(a, b, c, d, e, f, g, h, leftFlag)
end
)
text = string_gsubstring.gsub(text, "(.)([jGw])( *)/("..V..")(@?)( *)(_)([jGw])",
function(a, b, c, d, e, f, g, h)
return subst(a, b, c, d, e, f, g, h, rightFlag)
Line 920 ⟶ 903:
-- Exceptionally for the single word "rej".
text = string_gsubstring.gsub(text, "^(rG *)([V7])( *tj)$",
function(prefix, vowel, suffix)
return prefix..FRONT_VOWEL[vowel]..suffix
Line 928 ⟶ 911:
-- Vowels always claim the secondary articulation
-- of a neighboring back unrounded glide.
text = string_gsubstring.gsub(text, "(hG *)/([aEei])", function(prefix, vowel)
return prefix..BACK_VOWEL[vowel]
end)
text = string_gsubstring.gsub(text, "/([aEei])(@? *hG)", function(vowel, suffix)
return BACK_VOWEL[vowel]..suffix
end)
Line 937 ⟶ 920:
-- Unless already claimed, epenthetic vowels after a glide
-- always claim the secondary articulation to the left.
text = string_gsubstring.gsub(text, "([hH])(.)( *)/([aEei])@",
function(primaryL, secondaryL, _, vowel)
return (
Line 948 ⟶ 931:
-- Unless already claimed, vowels before a glide
-- always claim the secondary articulation to the right.
text = string_gsubstring.gsub(text, "/([aEei])(@?)( *[hHyY])(.)",
function(vowel, epenth, primaryR, secondaryR)
return (
Line 959 ⟶ 942:
-- For now, unless already claimed, vowels before a rounded consonant
-- claim the secondary articulation to the right.
text = string_gsubstring.gsub(text, "/([aEei])(@? *.w)", function(vowel, suffix)
return ROUND_VOWEL[vowel]..suffix
end)
Line 965 ⟶ 948:
-- For now, unless already claimed, remaining vowels
-- claim the secondary articulation to the left.
text = string_gsubstring.gsub(text, "([jGw])( *)/([aEei])",
function(secondaryL, _, vowel)
return secondaryL.._..VOWEL[F1[vowel]][F2[secondaryL]]
Line 976 ⟶ 959:
return prefix..VOWEL[F1[vowel]][F2[secondaryR]]..infix..secondaryR
end
text = string_gsubstring.gsub(text, "([yY]. *)([aEei])(@? *.)([jGw])", subst)
-- Change certain vowels in a special environment from round to front.
Line 984 ⟶ 967:
end
)
text = string_gsubstring.gsub(text, "(hj *)([Oou])( *)(.w)( *)("..V..")",
function(prefix, vowelL, _, conson, __, vowelR)
if conson ~= "hw" or F1[vowelL] ~= F1[vowelR] then
Line 991 ⟶ 974:
end
)
text = string_gsubstring.gsub(text, "(hj *)([Oou])( *.w *.w)",
function(prefix, vowel, suffix)
return prefix..FRONT_VOWEL[vowel]..suffix
end
)
text = string_gsubstring.gsub(text, "(a@? *hj *)Q( *.w *"..V..")", "%1a%2")
text = string_gsubstring.gsub(text, "(a@? *hj *)Q( *.w *.w)", "%1a%2")
-- Tag certain glide-vowel-non-glide sequences for special reflexes.
text = string_gsubstring.gsub(text, "(H[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string_gsubstring.gsub(text, "^ *(h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string_gsubstring.gsub(text, "(@ *h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string_gsubstring.gsub(text,
"([EeiAV7MOou]@? *h[jw] *)([aAQ] *[ptkmnNrl])", "%1/%2")
text = string_gsubstring.gsub(text,
"(hj *[aEei]@? *hw *)("..V.." *[ptkmnNrl])", "%1/%2")
-- Untag certain sequences, exempting them from special reflexes.
text = string_gsubstring.gsub(text, "(hj *)/([aEei] *[knNrl]w)", "%1%2")
-- Special reflexes.
text = string_gsubstring.gsub(text, "([jw])( *)/("..V..")( *)(.)([jGw])",
function(secondaryL, _, vowel, __, primaryR, secondaryR)
return secondaryL.._..
Line 1,024 ⟶ 1,007:
-- Temporarily cancel epenthetic {i} neighboring {yi'y}.
text = string_gsubstring.gsub(text, "i@( *yj)", "%1")
-- {yi'y} neighboring {i} may now be demoted to {y}.
text = string_gsubstring.gsub(text, "([iMu]@? *)yj", "%1hj")
text = string_gsubstring.gsub(text, "yj( *[iMu])", "hj%1")
-- {'yiy} may now be demoted everywhere.
text = string_gsubstring.gsub(text, "(i@ *)Yj", "%1hjihj")
text = string_gsubstring.gsub(text, "Yj", "hjihji@hj")
-- For the purposes of this template,
-- surface all glides pronounced in isolation.
text = string_gsubstring.gsub(text, "^ *h(.) *$", "H%1")
if not diphthongs then
-- Opportunistically front this vowel.
text = string_gsubstring.gsub(text, "(hj *)A( *[kN]G *[kN]?G? *"..V..")", "%1a%2")
-- Surface certain glides.
text = string_gsubstring.gsub(text, "^ *h(w *[Oou])", "H%1")
text = string_gsubstring.gsub(text, "h(w *[aEeiAV7M])", "H%1")
text = string_gsubstring.gsub(text, "^ *h(j *[AV7MQOou])", "H%1")
text = string_gsubstring.gsub(text, "([ptkmnNrl]..@ *)h(w *[Oou])", "%1H%2")
text = string_gsubstring.gsub(text,
"([ptkmnNrl].).@( *)h(j *[AV7MQOou])", "%1%2H%3")
text = string_gsubstring.gsub(text, "([AV7MQOou]@? *)h(j *[AV7MQOou])", "%1H%2")
text = string_gsubstring.gsub(text, "([aEeiAV7M])(@? *)hw( *)([QOou])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] > F1[vowelR] then
Line 1,056 ⟶ 1,039:
end
)
text = string_gsubstring.gsub(text, "([AV7MQOou])(@? *)hj( *)([aEei])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] > F1[vowelR] then
Line 1,063 ⟶ 1,046:
end
)
text = string_gsubstring.gsub(text, "([aEei])(@? *)hj( *)([AV7MQOou])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] < F1[vowelR] then
Line 1,070 ⟶ 1,053:
end
)
text = string_gsubstring.gsub(text, "("..V..")( *)h([jw]) *$",
function(vowel, _, secondary)
if F2[vowel] ~= F2[secondary] then
Line 1,080 ⟶ 1,063:
-- Protect word-final epenthetic vowels after non-glides
-- from the next operation.
text = string_gsubstring.gsub(text, "([ptkmnNrl][jGw]"..V..")(@ )", "%1/%2")
-- De-epenthesize vowels if they still neighbor unsurfaced glides.
text = string_gsubstring.gsub(text, "("..V..")@( *h.)", "%1%2")
text = string_gsubstring.gsub(text, "(h. *"..V..")@", "%1")
-- Adjust F1 of currently remaining epenthetic vowels.
Line 1,096 ⟶ 1,079:
)
text = string_gsubstring.gsub(text, "/", "")
end
-- Delete all remaining unsurfaced glides.
text = string_gsubstring.gsub(text, "h.", "")
-- Surface realization for {yi'y}.
text = string_gsubstring.gsub(text, "yj", "i^")
if not diphthongs then
Line 1,131 ⟶ 1,114:
-- Turn this surfaced glide into an epenthetic vowel.
text = string_gsubstring.gsub(text, "([ptkmnNrl]. *[aEei])%^( *"..V..")", "%1@%2")
end
if MERGED_VOWELS then
text = string_gsubstring.gsub(text, "[EO]", function(vowel)
return VOWEL[F1[vowel] + 1][F2[vowel]]
end)
Line 1,160 ⟶ 1,143:
index = index2
until index == 1
text = table_concattable.concat(chars, "")
end
-- Tweak remaining consonants, using offsets as a guide.
text = string_gsubstring.gsub(text, "()(.)([jGw])( *)([ptkmnNrl]?)([jGw]?)()",
function(
offsetL, primaryL, secondaryL, _, primaryR, secondaryR, offsetR
Line 1,263 ⟶ 1,246:
-- Elegantly connect long and epenthetic vowels across word gaps.
text = string_gsubstring.gsub(text, "(["..V_..":]): +", "%1 : ")
text = string_gsubstring.gsub(text, "("..V..") +%1([^%^])", "%1 :%2")
text = string_gsubstring.gsub(text, "("..V..") +%1$", "%1 :")
text = string_gsubstring.gsub(text, "("..V..")@ +%1", " %1 :")
text = string_gsubstring.gsub(text, "("..V.."@) +", " %1 ")
if W_OFF_GLIDES then
Line 1,277 ⟶ 1,260:
end
if false and PHONETIC_DETAILS then
text = string_gsubstring.gsub(text, "([pbm])(G *[aEei])(@?)",
function(primary, _, epenth)
if epenth == "" then
Line 1,285 ⟶ 1,268:
)
else
text = string_gsubstring.gsub(text, "([pbm])G( *[aEei])(@?)", subst)
end
text = string_gsubstring.gsub(text, "([kgnNrl])w( *[aEeiAV7M])(@?)", subst)
-- Remove [w] off-glides after certain consonants
-- when they occur after rounded vowels.
text = string_gsubstring.gsub(text, "([QOou] *[nrl]? *[nrl])Hw", "%1w")
text = string_gsubstring.gsub(text, "([QOou] *[kgN]? *N)Hw( *M)", "%1w%2")
end
Line 1,298 ⟶ 1,281:
if IMPLIED_SECONDARIES then
if PARENTHETICAL_EPENTHESIS then
text = string_gsubstring.gsub(text, "(.@)", "/%1")
if diphthongs then
text = string_gsubstring.gsub(text, "(.=.@)", "/%1")
end
end
if PARENTHETICAL_EPENTHESIS then
text = string_gsubstring.gsub(text, "/", "")
end
end
Line 1,311 ⟶ 1,294:
if PARENTHETICAL_EPENTHESIS then
if not diphthongs then
text = string_gsubstring.gsub(text, "(.)@("..V..")", "%1^%2")
end
text = string_gsubstring.gsub(text, "(.)@", "(%1)")
text = string_gsubstring.gsub(text, "%)(=?)%(", "%1")
end
-- Convert remaining word gaps to liaison.
text = string_gsubstring.gsub(text, "^ *", "")
text = string_gsubstring.gsub(text, " *$", "")
text = string_gsubstring.gsub(text, " +", false and "_" or "")
text = string_gsubstring.gsub(text, ".[jGw@%^]?", PHONETIC_IPA)
addUnique(outSeq, text)
Line 1,338 ⟶ 1,321:
-- For other values, list both possible dialect reflexes where applicable.
local dialect = args and args.dialect and
mw_ustring_lowermw.ustring.lower(mw_text_trimmw.text.trim(args.dialect)) or ""
if dialect == "rālik" then
dialect = "ralik"
Line 1,351 ⟶ 1,334:
-- Real-world pronunciation said to vary by sociological factors,
-- but all realizations may occur in free variation.
local modeJ = splitChars(args and args.J and string_lowerstring.lower(args.J) or "tst")
local initialJ = PHONETIC_ARG_J[modeJ[1] or ""] or "t"
local medialJ = PHONETIC_ARG_J[modeJ[2] or ""] or "s"
Line 1,380 ⟶ 1,363:
for _, str in pairs(inSeq) do
str = string_gsubstring.gsub(str, S, " ")
str = string_gsubstring.gsub(str, "^ *", "")
str = string_gsubstring.gsub(str, " *$", "")
local isRalik = dialect == "ralik"
if isRalik or dialect == "ratak" then
Line 1,411 ⟶ 1,394:
 
function export.bender(frame)
return table_concattable.concat(toBender(parse(frame.args[1], frame.args)), ", ")
end
 
Line 1,419 ⟶ 1,402:
 
function export.parse(frame)
return table_concattable.concat(parse(frame.args[1]), ", ")
end
 
function export.phonemic(frame)
return table_concattable.concat(toPhonemic(parse(frame.args[1])), ", ")
end
 
function export.phonetic(frame)
return table_concattable.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ")
end