Content deleted Content added
Opencooper (talk | contribs) better handling of Latin with katakana reading |
Opencooper (talk | contribs) comment |
||
(28 intermediate revisions by the same user not shown) | |||
Line 51:
https://en.wikipedia.org/wiki/Ninjō - No interlanguage, but wiktionary
https://en.wikipedia.org/wiki/Seiza - Interlanguage failed, but wiktionary
https://en.wikipedia.org/wiki/Epsomite - No interlanguage, but wiktionary "see" Table
https://en.wikipedia.org/wiki/Bakayaro!_I%27m_Plenty_Mad - only part of parenthesis extracted
Line 66 ⟶ 67:
https://en.wikipedia.org/wiki/Magnum_Collection_1999_%22Dear%22
https://en.wikipedia.org/wiki/EC_Comics
https://en.wikipedia.org/wiki/CJK_characters
https://en.wikipedia.org/wiki/My_Girlfriend_is_Shobitch
https://en.wikipedia.org/wiki/Immaculate_Conception_Cathedral,_Nagasaki - partial match
https://en.wikipedia.org/wiki/USA-224 - または
https://en.wikipedia.org/wiki/Milk - bad match
https://en.wikipedia.org/wiki/Not_invented_here
*/
Line 143 ⟶ 150:
// Need to add hyphen escaped since it has special behavior in regex classes
// TODO: Just escape $kanji early instead, like we did before?
var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");
kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;
kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
Line 164 ⟶ 171:
// Add kanji to regex to make sure we're not getting the reading of some
// other term
kanjiRegexes.leadUnspaced = new RegExp(kanjiEscaped + "
kanjiRegexes.lead = new RegExp(kanjiSpaced + "
}
Line 207 ⟶ 214:
function parseKanaClaim(response) {
var kana;
nativeLabel: "P1705",
kana = response.claims.P1814[0].mainsnak.datavalue.value;▼
officialName: "P1448",
} else if (response.claims.P1476 && response.claims.P1476[0].qualifiers▼
nameInNativeLanguage: "P1559"
&& response.claims.P1476[0].qualifiers.P1814) {▼
var nameInKana = "P1814";
kana = response.claims.P1476[0].qualifiers.P1814[0].datavalue.value;▼
// Try getting nameInKana as a qualifier to some properties
&& response.claims.P1705[0].qualifiers.P1814) {▼
for (var prop in properties) {
var pnum = properties[prop];
if (response.claims[pnum]) {
if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
break;
}
}
}
// Try getting nameInKana as a general claim
if (!kana && response.claims[nameInKana]) {
prop = "nameInKana";
}
// We couldn't find nameInKana
if (!kana) {
getInterlanguage();
return;
Line 226 ⟶ 252:
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wikidata");
$("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}
Line 242 ⟶ 269:
success: function(response) {
var pageId = mw.config.get( 'wgArticleId' );
var
var langlinks = page ? page.langlinks : undefined;
var jaLabel;
if (langlinks) {
Line 307 ⟶ 335:
kana = kana.replace(/[・、 ]$/, "");
// Abort if our reading is only katakana (for non-Latin) or Latin
if ((!kanjiRegexes.latinOnly.test(wikidataKanji) &&
||
console.log("showKanji-dev.js: throwing away reading: " + kana);
getWiktionary();
Line 387 ⟶ 415:
// Wiktionary adds readings as furigana
var headword = parsed.find(".headword:lang(ja)").first();
var seeTable = parsed.find(".Jpan ruby").first();
var kanji = "";
var kana = "";
if (headword.length) {
for (let i = 0; i < childNodes.length; i++) {
var ruby = $(childNodes[i]); // convert back to JQuery for convenience
ruby.children("rp").remove();▼
kanji += ruby.text();
} else if (childNodes[i].nodeType == 3) { // "#text" kanji += childNodes[i].nodeValue;
kana += childNodes[i].nodeValue;
}
}
} else if (seeTable.length) {
kanji = seeTable.children("rb").text();
} else {
return;
}
▲ if (kanji != wikidataKanji) { return; }
if (kana) {
|