User:Opencooper/showKanji-dev.js

This is an old revision of this page, as edited by Opencooper (talk | contribs) at 23:47, 30 December 2018 (add). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// This script shows, if found, the kanji and kana for an article
// For configuration, please see the documentation

/* Sample pages:
    https://en.wikipedia.org/wiki/Tamio_Kawachi - kana on wikidata
    https://en.wikipedia.org/wiki/A_Fantastic_Tale_of_Naruto - kanji from wikidata only
    https://en.wikipedia.org/wiki/What_a_Wonderful_World! - kana from wikidata only
    https://en.wikipedia.org/wiki/Asako_I_%26_II - from redirect
    https://en.wikipedia.org/wiki/Bokura_ga_Ita_(film) - interwiki to subsection
    https://ja.wikipedia.org/wiki/%E7%B4%AF - kana part of bolded title
    https://en.wikipedia.org/wiki/Bokutachi_no_Koukan_Nikki - kana not in first sentence
    https://ja.wikipedia.org/wiki/SCP%E8%B2%A1%E5%9B%A3 - bolded term w/ kana past first sentence
    https://en.wikipedia.org/wiki/Nuclear_fusion - different term w/ kana in lead
    https://en.wikipedia.org/wiki/20th_Century_Boys - overcapturing because title is subset
    https://en.wikipedia.org/wiki/Kanji_Furutachi - kanji only
    https://en.wikipedia.org/wiki/Anata_e - hiragana only
    https://en.wikipedia.org/wiki/Anatahan_(film) - katakana only
    https://en.wikipedia.org/wiki/A.LI.CE - latin only
    https://en.wikipedia.org/wiki/0.5_mm - numeric
    https://en.wikipedia.org/wiki/Comic_Magazine - exclamation point
    https://en.wikipedia.org/wiki/Flare_(film) - wave dash
    https://en.wikipedia.org/wiki/Dog%C3%97Police - multiplication sign
    https://en.wikipedia.org/wiki/Foreboding_(film) - spaces
    https://en.wikipedia.org/wiki/Age_12 - period in title
    https://en.wikipedia.org/wiki/After_the_Rain_(manga) - kanji + hiragana
    https://en.wikipedia.org/wiki/Afro_Tanaka - kanji + katakana
    https://en.wikipedia.org/wiki/Battle_Girl:_The_Living_Dead_in_Tokyo_Bay - katakana + latin
    https://en.wikipedia.org/wiki/Calling_You_(short_story_collection) - kanji + hiragana + latin
    https://en.wikipedia.org/wiki/Ashita_no_Joe - hiragana + katakana
    https://en.wikipedia.org/wiki/Arcadia_of_My_Youth - kanji + hiragana + katakana
*/

function setup() {
    // If we're not reading an article, do nothing
    if (!(mw.config.get( 'wgAction' ) === 'view'
          && mw.config.get( 'wgIsArticle' )
          && !___location.search.split('oldid=')[1]
          && mw.config.get("wgPageName") !== "Main_Page")) {
        return;
    }

    // Assuming that if there's no wikidata, there're no 1:1 interlanguage links,
    // and we don't want cases where a page links to a subsection of a jawiki
    // article
    if (wikidataId === null) {
        return;
    }

    // Placeholder so other elements don't push it down later
    $('#firstHeading').append("<div id='kanjiInfo' lang='ja'></div>");

    // Get the Japanese label from wikidata
    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities
    $.ajax({
        url: "https://www.wikidata.org/w/api.php",
        data: {
            action: "wbgetentities",
            ids: wikidataId,
            props: "labels",
            languages: "ja",
            format: "json",
            origin: "*"
        },
        success: parseJaLabel
    });
}

function parseJaLabel(response) {
    var wikidataInfo = response.entities[wikidataId];
    var jaLabel;
    if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) {
        jaLabel = wikidataInfo.labels.ja.value;
    }

    if (jaLabel) {
        displayKanji(jaLabel);
    } else {
        return;
    }

    // If the japanese title is not just only kana, get the reading
    if (!kanaOnlyRe.test(jaLabel)) {
        requestKana();
    }
}

function displayKanji(kanji) {
    $('#kanjiInfo').append("<ruby>" + kanji + "</ruby>");

    // Add some classes so users can choose to not display for example
    // katakana-only kanji in their CSS
    if (latinOnlyRe.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-latin-only");
        $("#kanjiInfo").css("display", "none");
    } else if (hiraganaOnlyRe.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-hiragana-only");
    } else if (katakanaOnlyRe.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-katakana-only");
    }
}

function requestKana() {
    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims
    // We have to wholesale get all the claims instead of just one because the
    // kana might be present as a qualifier to another claim
    $.ajax({
        url: "https://www.wikidata.org/w/api.php",
        data: {
            action: "wbgetclaims",
            entity: wikidataId,
            format: "json",
            origin: "*"
        },
        success: parseKanaClaim
    });
}

function parseKanaClaim(response) {
    var kana;
    if (response.claims.P1814) {
        // name in kana
        kana = response.claims.P1814[0].mainsnak.datavalue.value;
    } else if (response.claims.P1476 && response.claims.P1476[0].qualifiers
               && response.claims.P1476[0].qualifiers.P1814) {
        // title w/ name in kana
        kana = response.claims.P1476[0].qualifiers.P1814[0].datavalue.value;
    } else if (response.claims.P1705 && response.claims.P1705[0].qualifiers
               && response.claims.P1705[0].qualifiers.P1814) {
        // native label w/ name in kana
        kana = response.claims.P1705[0].qualifiers.P1814[0].datavalue.value;
    } else {
        scrapeKana();
        return;
    }

    displayKana(kana);
    $("#kanjiInfo rt").addClass("kanjiInfo-wikidata");
}

function scrapeKana() {
    var jaLabel;
    if ($(".interwiki-ja").length) {
        jaLabel = getInterlanguageJa();
    } else {
        return;
    }

    // Get jawiki article's lead wikitext
    // API docs: https://www.mediawiki.org/wiki/API:Revisions
    $.ajax({
        url: "https://ja.wikipedia.org/w/api.php",
        data: {
            action: "query",
            prop: "revisions",
            format: "json",
            titles: jaLabel,
            redirects: "true",
            rvprop: "content",
            rvsection: "0",
            rvslots: "main",
            origin: "*"
        },
        success: parseJaLead
    });
}

function getInterlanguageJa() {
    var jaLabel = $(".interwiki-ja .interlanguage-link-target").attr("href");
    jaLabel = jaLabel.split("\/wiki\/")[1];
    jaLabel = decodeURIComponent(jaLabel);
    jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors
    return jaLabel;
}

function parseJaLead(response) {
    var responsePart = response.query.pages;
    // Have to split parsing into two parts since jawiki pageid is unknown
    var pageId = Object.keys(responsePart)[0];
    var leadText = responsePart[pageId].revisions[0].slots.main["*"];

    // Only first sentence
    var introSearch = leadText.match(/'''.*?。/);
    var wikitext;
    if (introSearch) {
        wikitext = introSearch[0];
    } else {
        return;
    }
    wikitext = wikitext.replace(/\{\{.*?\}\}/g, "{{}}"); // Remove templates

    var kana;
    var kanji = $("#kanjiInfo").text();
    var kanjiEscaped = mw.RegExp.escape(kanji);
    kanjiEscaped = kanjiEscaped.replace(/\B/g, " ?"); // Account for spaces
    // Add kanji to regex to make sure we're not getting the reading of some
    // other term
    var leadKanaRe = new RegExp(kanjiEscaped + ".*?" + leadRe);

    console.log("showKanji-dev.js: lead: " + wikitext);
    console.log("showKanji-dev.js: kanji: " + kanji);
    console.log("showKanji-dev.js: regex: " + leadKanaRe);

    var kanaSearch = wikitext.match(leadKanaRe);
    if (kanaSearch && kanaSearch.length == 2) {
        kana = kanaSearch[1];
    } else {
        return;
    }

    // Rm trailing characters
    kana = kana.replace(/[・、]$/, "");

    displayKana(kana);
    $("#kanjiInfo rt").addClass("kanjiInfo-jawiki");
}

function displayKana(kana) {
    $("#kanjiInfo ruby").append("<rt>" + kana + "</rt>");
}

var wikidataId = mw.config.get( 'wgWikibaseItemId' );
// Regexes
var latinOnlyRe = /^[A-Za-z0-9\-.?!,:;@#$%&+=*'" ]+$/;
// Stolen from https://www.wikidata.org/wiki/Property:P1814#P1793
var extrasRe = "0-9「」・、  \\-.?!!~〜×";
var kanaOnlyRe = new RegExp("^[ぁ-ゔァ-ヴー" + extrasRe + "]+$");
var hiraganaOnlyRe = new RegExp("^[ぁ-ゔーA-Za-z" + extrasRe + "]+$");
var katakanaOnlyRe = new RegExp("^[ァ-ヴーA-Za-z" + extrasRe + "]+$");
var leadRe = "'''.*?(([ぁ-ゔァ-ヴー" + extrasRe + "]+)"; // brittle
$(setup);