User:HarJIT/Scripts/unicategorise.js: Difference between revisions

Content deleted Content added
No edit summary
m Fix requested on WP:IANB
 
(46 intermediate revisions by 2 users not shown)
Line 1:
// <nowiki>
// Adjust chset-* style code chart colouration to match Unicode categories.
// Adds a button below the source editor.
//
// I am not selling this and make no guarantees of safety, fitness or that it won't mangle the content.
Line 5 ⟶ 7:
// clean up where necessary (and manually fix the more complicated cases, such as multiple mappings
// or PUA mappings).
//
// Furthermore, this loads JavaScript code from a third-party source for identifying Unicode character
// category. I cannot guarantee that this will not be compromised. Proceed at your own risk.
//
// Usage: mw.loader.load("//en.wikipedia.org/w/index.php?action=raw&ctype=text/javascript&title=User:HarJIT/Scripts/unicategorise.js");
Line 20 ⟶ 25:
temp[temp.length] = this.substring(heading);
return temp;
};
 
String.prototype.startswith = function (s) {
return this.substring(0, s.length) == s;
};
 
String.prototype.endswith = function (s) {
return this.substring(this.length - s.length) == s;
};
 
String.prototype.contains = function (s) {
return this.indexOf(s) >= 0;
};
 
var ezh = null;
jQuery.get("https://cdn.rawgitjsdelivr.comnet/gh/slevithan/xregexp/7a168874@57f919a3ebc58ea2f56cb5b2391b6151483b1709/tools/output/categories.js", (b) => {
b = b.pysplit("=", 1)[1].trim();
if (b.endswith(";")) {
Line 45 ⟶ 50:
var yogh = {};
var doyogh = () => {
if (ezh === null) {
setTimeout(doyogh, 500);
return;
Line 56 ⟶ 61:
}
});
};
doyogh();
 
var CHECKMS = "<!-- XXX: check colour -->";
var SHARED = "<!-- Note: Only the shared/invariant EBCDIC cells are colored; international glyphs are not. -->\n";
var FROM = "Invariant alphanumeric, punctuation, and control characters are shown in color.";
var TO = "Non-invariant characters are shown boxed.";
var SUMMARY = "regenerate colour codes based on Unicode category ([[User:HarJIT/Scripts/unicategorise.js|script]])";
var SUMMARY2 = "box nationalised codes, and regenerate colour codes based on Unicode category ([[User:HarJIT/Scripts/unicategorise.js|script]])";
var FAKEHEAD = "|-\n| width=\"4%\" |\n! width=\"6%\" | \u20140 || width=\"6%\" | \u20141\n! width=\"6%\" | \u20142 || width=\"6%\" | \u20143\n! width=\"6%\" | \u20144 || width=\"6%\" | \u20145\n! width=\"6%\" | \u20146 || width=\"6%\" | \u20147\n! width=\"6%\" | \u20148 || width=\"6%\" | \u20149\n! width=\"6%\" | \u2014A || width=\"6%\" | \u2014B\n! width=\"6%\" | \u2014C || width=\"6%\" | \u2014D\n! width=\"6%\" | \u2014E || width=\"6%\" | \u2014F";
var FAKEFOOT = "|-\n||\n!—0||—1||—2||—3||—4||—5||—6||—7||—8||—9||—A||—B||—C||—D||—E||—F";
 
var fix = function (inp) {
var output = "";
// We need *a* colour template to begin with in order to parse it, even if
var doz = inp.split("\n|{{chset-color-");
// it is the wrong one. For the places where they aren't used already.
// Also some EBCDIC pages use lack of colour as a distinguishing mark, which
// no longer shows up now that -letter is white.
var iox = (inp.contains(SHARED))?("color-intl-box"):("color-intl");
inp = inp.replace(/\n\|\s*\|?\{\{[Cc]hset-c(?=ell|trl)/g, "\n|{{chset-" + iox + "}}|{{chset-c");
var doz = inp.split("{{Chset-").join("{{chset-").split("\n|{{chset-color-");
output += doz[0];
doz.slice(1).forEach((ii) => {
var i = ii;
var nombre = "";
if (i.startswith("undef") || !i.substring(0, 40).contains("l3|")) {
if (i.startswith("undef")) {
output += "\n|{{chset-color-" + ii;
return; /* i.e. continue */
} else if (i.startswith("intl}}|{{chset-cell3||<u>''")) {
// Common in APL code pages, including EBCDIC ones.
output += "\n|{{chset-color-letter" + ii.substring(4);
return; /* i.e. continue */
} else if (i.startswith("hangups}}|{{chset-cell3||<u>''")) {
// What the hell (chset-color-hangups does not exist and afaik never did)
output += "\n|{{chset-color-letter" + ii.substring(7);
return; /* i.e. continue */
} else if (i.substring(0, 40).contains("l4|")) {
nombre = "4";
} else if (i.substring(0, 40).contains("l3|")) {
nombre = "3";
} else if (!i.substring(0, 40).contains("l|")) {
output += "\n|{{chset-color-" + ii;
return; /* i.e. continue */
}
var layout = (i.substring(0, 40).contains("chset-ctrl")) ? ("-ctrl") : ("-cell");
var iii = i.substring(0, 15);
if (iii.startswith("esc")) {
output += "\n|{{chset-color-" + ii;
return; /* i.e. continue */
}
var layouthilite = (i.substring(0, 40)iii.contains("chset-ctrlbox")) ? ("-ctrl3box") : ("-cell3");
var hilite = ((i.substring(0, 15)iii.contains("-boxvar")) ? ("-boxvar") : ("")));
if (hilite == "-box" && ((i.substring(0, 15)iii.contains("-varbox|")) ? ("-var") : ("")));{
if (hilite == "-box" && i.substring(0, 15).contains("-box|")) {
hilite += "|" + i.pysplit("-box|", 1)[1].pysplit("}", 1)[0];
}
var iiiwlink = inull;
iif (nombre === "" && i.pysplit("l3|}}", 1)[1];.trim().startswith("|[[")) {
// MIK being _incredibly_ helpful
var cpt = i.pysplit("|", 1)[0].trim();
var codept i = i.pysplit("|[[", 1)[1];
if (cpt wlink = i.matchpysplit(/^"|", 1)[0-9a-f]+$/i)) {;
codepti = Stringi.fromCodePointpysplit(parseInt(cpt"|", 16)2)[2];
} else {
outputi += i.pysplit("\n|{{chset-color-l" + iinombre + "|", 1)[1];
return; /* i.e. continue */
}
var cpt = i.pysplit("|", 1)[0].pysplit("}}", 1)[0].trim();
var colour;
ifvar (codept.match(yogh["Co"])) {;
if (wlink !== null) {
// Private use, which could mean:
// MIK being _incredibly_ helpful indeed
// (a) An end-user defined character in the source encoding (which would be -misc).
var codep = String.fromCodePoint(parseInt(cpt, 16));
// (b) A well-defined character without a standard Unicode mapping (e.g. the Apple
i = i.split("}}]]").join("|[[" + wlink + "|" + codep + "]]}}");
// logo in Macintosh, the Windows logo in Wingdings, the radical extender in
}
// x-mac-symbol, several characters in KPS 9566 and LMBCS...).
if (cpt.trim().length === 0 && iii.startswith("ctrl")) {
// ==> Let a human be the judge here.
output// +=Unmapped "\n|<!--controls, XXX:common PUAin -->{{chset-color-"articles +about ii;EBCDIC variants.
return;cpt /*= i.e."0000"; continue *// Kludgy
}
} else if (codept.match(yogh["L"])) {
var cpts = cpt.replace(/<ref[^>]*?(\/>|>[^<]*?<\/ref>)/g, " ");
colour = "-alpha";
cpts = cpts.replace(/\(/, "/").replace(/\)/, " ").replace(/\?/, " ");
} else if (codept.match(yogh["N"])) {
cpts = cpts.replace(" ", " ").replace(" ", " ").replace(" ", " ").replace(" ", " ");
colour = "-digit";
}cpts else= if (codeptcpts.matchsplit(yogh["P/"])) {;
var colours = new colour = "-punct"Set([]);
var checkmsg = "";
if (iii.substring(0, 15).contains("-ext-punct") && (parseInt(cpt, 16) > 0x7F)) {
cpts.forEach((cpt) => {
// May as well keep it like that for now...
colourcpt = "-ext-punct"cpt.trim();
if (cpt.contains(" ")) {
cpt = cpt.pysplit(" ", 1)[0];
checkmsg = CHECKMS;
}
} else if (codeptcpt.match(yogh/^["S"0-9a-f]+$/i)) {
colour codept = "-graph"String.fromCodePoint(parseInt(cpt, 16));
} else if (codept.match(yogh["C"])) } else {
colour = "-ctrl" console.log(cpt); return; /* i.e. continue */
}
if (codept.match(yogh["Co"])) {
// Private use, which could mean:
// (a) An end-user defined character in the source encoding (which would be -misc).
// (b) A well-defined character without a standard Unicode mapping (e.g. the Apple
// logo in Macintosh, the Windows logo in Wingdings, the radical extender in
// x-mac-symbol, several characters in KPS 9566 and LMBCS...).
// ==> Let a human be the judge here.
console.log(cpt); return; /* i.e. continue */
} else if (codept.match(yogh["L"])) {
if (iii.startswith("alpha")) {
// May as well keep it like that for now...
colours.add("-alpha");
} else {
colours.add("-letter");
}
} else if (codept.match(yogh["N"])) {
colours.add("-digit");
} else if (codept.match(yogh["P"])) {
if (iii.startswith("ext") && (parseInt(cpt, 16) > 0x7F)) {
// May as well keep it like that for now...
colours.add("-ext-punct");
} else {
colours.add("-punct");
}
} else if (codept.match(yogh["S"])) {
colours.add("-graph");
} else if (codept.match(yogh["C"])) {
colours.add("-ctrl");
} else {
colours.add("-misc");
}
});
colours = Array.from(colours);
var colour = null;
if (colours.length == 0) {
output += "\n|" + CHECKMS + "{{chset-color-" + ii;
return; /* i.e. continue */
} else if (colours.length == 1) {
colour = colours[0];
} else {
colourcolours.forEach((col) => "-misc";{
if (iii.startswith(col.substring(1))) {
colour = col;
}
});
if (colour === null) {
output += "\n|" + CHECKMS + "{{chset-color-" + ii;
return; /* i.e. continue */
}
}
ioutput += "\n|" + checkmsg + "{{chset-color" + colour + hilite + "}}|{{chset" + layout + nombre + "|" + i;
output += i;
});
return output;
};
 
jQuery(() => {
Line 122 ⟶ 212:
var butn = document.createElement("input");
butn.setAttribute("type", "button");
butn.setAttribute("value", "Fix character classeschset-color");
var nxt = jQuery("#editpage-copywarn")[0];
nxt.parentNode.insertBefore(butn, nxt);
butn.onclick = () => {
var rprt = SUMMARY;
var txt = jQuery("#wpTextbox1")[0];
var vl = txt.value;
var fx = fix(vl);
if (fx != vl) {
if (vl.contains(SHARED)) {
fx = fx.split(SHARED).join("").split(FROM).join(TO);
rprt = SUMMARY2;
}
var tytl = jQuery("#firstHeading")[0].innerText.trim().substring("Editing ".length);
fx = fx.replace(FAKEHEAD, "{{chset-table-header|" + tytl + "}}");
fx = fx.replace(FAKEFOOT, "{{chset-table-footer}}");
txt.value = fx;
jQuery("#wpSummary")[0].value = rprt;
}
jQuery("#wpSummary")[0].value = "regenerate colour codes based on Unicode category ([[User:HarJIT/Scripts/unicategorise.js|script]])";
}
}
Line 140 ⟶ 239:
 
// End: [[m:w:User:HarJIT/Scripts/unicategorise.js]]
// </nowiki>