User:Opencooper/showKanji-dev.js: Difference between revisions

Content deleted Content added
fix
comment
 
(27 intermediate revisions by the same user not shown)
Line 51:
https://en.wikipedia.org/wiki/Ninjō - No interlanguage, but wiktionary
https://en.wikipedia.org/wiki/Seiza - Interlanguage failed, but wiktionary
https://en.wikipedia.org/wiki/Epsomite - No interlanguage, but wiktionary "see" Table
https://en.wikipedia.org/wiki/Bakayaro!_I%27m_Plenty_Mad - only part of parenthesis extracted
 
Line 66 ⟶ 67:
https://en.wikipedia.org/wiki/Magnum_Collection_1999_%22Dear%22
https://en.wikipedia.org/wiki/EC_Comics
https://en.wikipedia.org/wiki/CJK_characters
https://en.wikipedia.org/wiki/My_Girlfriend_is_Shobitch
https://en.wikipedia.org/wiki/Immaculate_Conception_Cathedral,_Nagasaki - partial match
https://en.wikipedia.org/wiki/USA-224 - または
https://en.wikipedia.org/wiki/Milk - bad match
https://en.wikipedia.org/wiki/Not_invented_here
*/
 
Line 143 ⟶ 150:
// Need to add hyphen escaped since it has special behavior in regex classes
// TODO: Just escape $kanji early instead, like we did before?
if (/-/.test(kanji)) { kanjiStripped += "\\-"; }
var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");
 
kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'" ]+$/;
kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
Line 164 ⟶ 171:
// Add kanji to regex to make sure we're not getting the reading of some
// other term
kanjiRegexes.leadUnspaced = new RegExp(kanjiEscaped + ".[^(\n)]*?\\(" + leadReBase);
kanjiRegexes.lead = new RegExp(kanjiSpaced + ".[^(\n)]*?\\(" + leadReBase, "i"); // brittle
}
 
Line 207 ⟶ 214:
function parseKanaClaim(response) {
var kana;
ifvar (response.claims.P1814)properties = {
// name in kana title: "P1476",
nativeLabel: "P1705",
kana = response.claims.P1814[0].mainsnak.datavalue.value;
officialName: "P1448",
} else if (response.claims.P1476 && response.claims.P1476[0].qualifiers
nameInNativeLanguage: "P1559"
&& response.claims.P1476[0].qualifiers.P1814) {
// title w/ name in kana };
var nameInKana = "P1814";
kana = response.claims.P1476[0].qualifiers.P1814[0].datavalue.value;
} else if (response.claims.P1705 && response.claims.P1705[0].qualifiers
// Try getting nameInKana as a qualifier to some properties
&& response.claims.P1705[0].qualifiers.P1814) {
for (var prop in properties) {
// native label w/ name in kana
var pnum = properties[prop];
kana = response.claims.P1705[0].qualifiers.P1814[0].datavalue.value;
} else {
if (response.claims[pnum]) {
var kanji &&= response.claims.P1705[pnum][0].qualifiersmainsnak.P1814) {datavalue.value.text;
if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
} else if (response.claims.P1476 && response.claims.P1476[pnum][0].qualifiers
&& response.claims.P1476[pnum][0].qualifiers.P1814[nameInKana]) {
kana = response.claims.P1476[pnum][0].qualifiers.P1814[nameInKana][0].datavalue.value;
break;
}
}
}
 
// Try getting nameInKana as a general claim
if (!kana && response.claims[nameInKana]) {
prop = "nameInKana";
kana = response.claims.P1814[nameInKana][0].mainsnak.datavalue.value;
}
// We couldn't find nameInKana
if (!kana) {
getInterlanguage();
return;
Line 226 ⟶ 252:
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wikidata");
$("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}
 
Line 242 ⟶ 269:
success: function(response) {
var pageId = mw.config.get( 'wgArticleId' );
var langlinkspage = response.query.pages[pageId].langlinks;
var langlinks = page ? page.langlinks : undefined;
var jaLabel;
if (langlinks) {
Line 307 ⟶ 335:
kana = kana.replace(/[・、 ]$/, "");
 
// Abort if our reading is only katakana (for non-Latin) or Latin
if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && /^[ァ-ヴー]+$/kanjiRegexes.katakanaOnly.test(kana))
|| /^\w+$/kanjiRegexes.latinOnly.test(kana)) {
console.log("showKanji-dev.js: throwing away reading: " + kana);
getWiktionary();
Line 387 ⟶ 415:
// Wiktionary adds readings as furigana
var headword = parsed.find(".headword:lang(ja)").first();
var seeTable = parsed.find(".Jpan ruby").first();
var kanji = "";
var kana = "";
if (headword.length) {
// Wiktionary already binds their kana, so we have to undo the process to get
// the constituent parts, at least with the current markup
var childNodes = headword[0].childNodes;
for (let i = 0;var ichildNodes <= headword[0].childNodes.length; i++) {
for (let i = 0; i < childNodes.length; i++) {
if (childNodes[i].nodeName == "RUBY") {
var ruby = $ if (childNodes[i]).nodeName //== convert back to JQuery for"RUBY") convenience{
var ruby = $(childNodes[i]); // convert back to JQuery for convenience
ruby.children("rp").remove();
kana += ruby.children("rtrp").detach().textremove();
kanjikana += ruby.children("rt").detach().text();
kanji += ruby.text();
} else if (childNodes[i].nodeType == 3) { // "#text"
kanji += childNodes[i].nodeValue;
kana += childNodes[i].nodeValue;
}
}
if (kanji != wikidataKanji) { return; }
} else if (seeTable.length) {
kanji = seeTable.children("rb").text();
rubykana = seeTable.children("rprt").removetext();
} else {
return;
}
if (kanji != wikidataKanji) { return; }
 
if (kana) {