Content deleted Content added
Opencooper (talk | contribs) update deprecated |
Opencooper (talk | contribs) comment |
||
(48 intermediate revisions by the same user not shown) | |||
Line 2:
// It then calls another script, bindKana.js, to clean up the display of ruby
// For configuration, please see the documentation
// TODO: Reject if any base/reading has too low/high of a ratio.
// TODO: Reject if unbalanced parenthesis count
// TODO: <rb> is not actually in the whatcg standard...
// License: CC0
Line 47 ⟶ 51:
https://en.wikipedia.org/wiki/Ninjō - No interlanguage, but wiktionary
https://en.wikipedia.org/wiki/Seiza - Interlanguage failed, but wiktionary
https://en.wikipedia.org/wiki/Epsomite - No interlanguage, but wiktionary "see" Table
https://en.wikipedia.org/wiki/Bakayaro!_I%27m_Plenty_Mad - only part of parenthesis extracted
Line 57 ⟶ 62:
https://en.wikipedia.org/wiki/Love_Live!_The_School_Idol_Movie - interpunct in reading
https://en.wikipedia.org/wiki/Lupin_the_Third:_The_Woman_Called_Fujiko_Mine - hyphen in kanji
https://en.wikipedia.org/wiki/Sunscreen
https://en.wikipedia.org/wiki/Flag_of_China
https://en.wikipedia.org/wiki/W3m
https://en.wikipedia.org/wiki/Magnum_Collection_1999_%22Dear%22
https://en.wikipedia.org/wiki/EC_Comics
https://en.wikipedia.org/wiki/CJK_characters
https://en.wikipedia.org/wiki/My_Girlfriend_is_Shobitch
https://en.wikipedia.org/wiki/Immaculate_Conception_Cathedral,_Nagasaki - partial match
https://en.wikipedia.org/wiki/USA-224 - または
https://en.wikipedia.org/wiki/Milk - bad match
https://en.wikipedia.org/wiki/Not_invented_here
*/
Line 134 ⟶ 150:
// Need to add hyphen escaped since it has special behavior in regex classes
// TODO: Just escape $kanji early instead, like we did before?
var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");
kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;
kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
Line 147 ⟶ 163:
var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)";
var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?");
kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");
Line 155 ⟶ 171:
// Add kanji to regex to make sure we're not getting the reading of some
// other term
kanjiRegexes.leadUnspaced = new RegExp(kanjiEscaped + "
kanjiRegexes.lead = new RegExp(kanjiSpaced + "
}
Line 198 ⟶ 214:
function parseKanaClaim(response) {
var kana;
nativeLabel: "P1705",
kana = response.claims.P1814[0].mainsnak.datavalue.value;▼
officialName: "P1448",
} else if (response.claims.P1476 && response.claims.P1476[0].qualifiers▼
nameInNativeLanguage: "P1559"
&& response.claims.P1476[0].qualifiers.P1814) {▼
var nameInKana = "P1814";
kana = response.claims.P1476[0].qualifiers.P1814[0].datavalue.value;▼
// Try getting nameInKana as a qualifier to some properties
&& response.claims.P1705[0].qualifiers.P1814) {▼
for (var prop in properties) {
var pnum = properties[prop];
if (response.claims[pnum]) {
if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
break;
}
}
}
// Try getting nameInKana as a general claim
if (!kana && response.claims[nameInKana]) {
prop = "nameInKana";
}
// We couldn't find nameInKana
getInterlanguage();
return;
Line 217 ⟶ 252:
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wikidata");
$("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}
Line 233 ⟶ 269:
success: function(response) {
var pageId = mw.config.get( 'wgArticleId' );
var
var langlinks = page ? page.langlinks : undefined;
var jaLabel;
if (langlinks) {
Line 298 ⟶ 335:
kana = kana.replace(/[・、 ]$/, "");
// Abort if our reading is only katakana (for non-Latin) or
if (
console.log("showKanji-dev.js: throwing away reading: " + kana);
getWiktionary();
return;
Line 373 ⟶ 412:
var html = response.parse.text["*"];
var parsed = $($.parseHTML(html));
var headword = parsed.find(".headword:lang(ja)").first();
var seeTable = parsed.find(".Jpan ruby").first();
if (headword.text() != wikidataKanji) { return; }▼
▲ // Wiktionary tags its readings with the "form-of" class
var kanji = "";
▲ if (kana) {
if (headword.length) {
▲ if (!kanjiRegexes.kanaOnly.test(kana)) { return; }
// Wiktionary already binds their kana, so we have to undo the process to get
// the constituent parts, at least with the current markup
var childNodes = headword[0].childNodes;
for (let i = 0; i < childNodes.length; i++) {
if (childNodes[i].nodeName == "RUBY") {
var ruby = $(childNodes[i]); // convert back to JQuery for convenience
ruby.children("rp").remove();
kana += ruby.children("rt").detach().text();
kanji += ruby.text();
} else if (childNodes[i].nodeType == 3) { // "#text"
kanji += childNodes[i].nodeValue;
kana += childNodes[i].nodeValue;
}
}
} else if (seeTable.length) {
kanji = seeTable.children("rb").text();
kana = seeTable.children("rt").text();
} else {
return;
}
if (kana) {
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wiktionary");
// Extra stuff just for fun
var definition = headword.parent().siblings("ol").children("li").first().text();
definition = definition.split('\n', 1)[0];
definition = definition.replace(/\[[0-9]{1,2}\]/g, "");
$("#kanjiInfo").prop("title", definition);
}
|