User:PleaseStand/segregate-refs-dev.js

This is an old revision of this page, as edited by PleaseStand (talk | contribs) at 07:03, 6 February 2010 (fixed regex). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<pre><nowiki>

/*  segregate-refs.js: A user script to simplify editing of articles
    using inline ref tags with the Cite.php extension to MediaWiki.
    
    Copyright (c) 2010, PleaseStand
    
    This software is licensed under these licenses:
    1.  Creative Commons Attribution-Share Alike 3.0 Unported License
        (see <http://creativecommons.org/licenses/by-sa/3.0/> for the text)
    2.  GNU Free Documentation License, version 1.3 or later.
        (see <http://www.gnu.org/copyleft/fdl.html> for the text)
    3.  Permission to use, copy, modify, and/or distribute this software for any
        purpose with or without fee is hereby granted, provided that the above
        copyright notice and this permission notice appear in all copies.
    
        THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
        WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
        MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
        ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
        WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
        ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
        OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    
    You may select the license(s) of your choice if you wish to copy, modify, or
    distribute this software. If you modify the software and do not wish to
    license your changes under one or more of the licenses, please remove
    the license(s) from the list above.
*/

/*global window, SegregateRefsJsL10n, SegregateRefsJsEmptyRefsWarningGiven,
wikEdUseWikEd, WikEdUpdateTextarea, WikEdUpdateFrame*/

// Translate these if necessary.
// Put translations in a separate file, changing the first line to:
// var SegregateRefsJsL10n = {
var SegregateRefsJsMsgs = {
  buttonText: "Segregate refs",
  buttonStyle: "background: #dfd;",
  autoWord: "Auto",
  emptyRefsWarning: "IMPORTANT: This page includes one or more named " +
    "footnotes of which the first occurrence(s) have no contents, which is " +
    "not best practice. This script does not harm such footnotes, however " +
    "you should be aware that this script ONLY CHECKS THE FIRST REF TAG " +
    "it finds for the actual citation or note text, and you must work within " +
    "this limitation. Any change to the empty ref tag will replace the " +
    "footnote entirely and leave the old note text hidden.\n\n" +
    "BEFORE concluding from the footnotes list that a ref is in fact empty, " +
    "please manually check all identically-named ref tags for the contents. " +
    "\n\nDo you acknowledge this limitation of the script? DO NOT CLICK OK " +
    "UNTIL YOU HAVE READ THE ABOVE INFORMATION.",
  refsHeader: "Inline footnotes"
};

// Begin encapsulation (prevent interference with other scripts)
function SegregateRefsJs(){

// Semi-global variables (private to this script)
var editForm; var refsDiv; var refsH2; var mainTextbox; var refsTextbox;
var randPrefix;

// Handle message translations
var messages = (typeof SegregateRefsJsL10n == "object" ? SegregateRefsJsL10n :
    SegregateRefsJsMsgs);

// Extend the string object with new methods
// Begin with the prefix "Ps" to avoid name clashes

// Add support for setting a slice of a string.
// (Only works with positive indices.)
String.prototype.PsSetSlice = function(replacement, indexFrom, indexTo) {
    if(typeof indexTo == "undefined") {
        return this.slice(0, indexFrom) + replacement;
    }
    return this.slice(0, indexFrom) + replacement + this.slice(indexTo);
};

// Add support for unquoting from HTML-quoted form.
String.prototype.PsHTMLUnquote = function() {
    // Let's use the browser's functionality for the hard work,
    // since MediaWiki/PHP supports many different HTML entities.
    // (Note: innerHTML is not W3C-standard)
    var d = window.document.createElement("div");
    d.innerHTML = "<input value=" + this + "></input>";
    return d.firstChild.value;
};

// Add support for quoting using HTML quotes. Chooses single quotes versus
// double quotes depending on which is shorter.
String.prototype.PsHTMLQuote = function() {
    // Escape ampersands
    var s = this.replace(/\&/g, "&amp;");
    // Try both kinds of quotes
    var sQ = "'" + s.replace(/'/g, "&#39;") + "'";
    var dQ = '"' + s.replace(/"/g, "&quot;") + '"';
    // Choose the shorter, preferring double quotes if equal in length
    return (sQ.length < dQ.length ? sQ : dQ);
};

// OBJECTS

// RefScanner: Use for identifying ref tags in text. (No nested refs please)
function RefScanner(argWikiText) {
    this.wikiText = argWikiText;
    // The tags listed below other than "ref" are there for an obvious reason.
    // NB: "references" is here to prevent out-of-line refs from being returned.
    this.refScanRegex = /<(nowiki|source|references|ref)(?:\s|(?:[^"']|"[^"]*"|'[^']*')*?)(?:\/>|(?:>[\s\S]*?<\/\1(?:|\s[^>]*)>))/gi;
}
RefScanner.prototype = {
    // Returns the next ref found in the text
    getRef: function getRef() {
        var results = [0,0];
        while(results[1].toString().toLowerCase() != "ref") {
            results = this.refScanRegex.exec(this.wikiText);
            if(!results) {
                return null;
            }
        }
        return results[0];
    }
};

// RefParser: Use for extracting attributes from ref tags
function RefParser(argWikiText) {
   this.wikiText = argWikiText;
   // The below regex is mostly a copy of the refScanRegex above, except that
   // the whole string must be a ref, and no more, and two parts are extracted:
   // $1=attributes, $2=remaining portion of ref
   var refParseRegex = /^<ref(|\s(?:[^"']|"[^"]*"|'[^']*')*?)(\/>|(?:>[\s\S]*?<\/ref(?:|\s[^>]*)>))$/i;
   this.parsedRef = refParseRegex.exec(this.wikiText);
   if(!this.parsedRef) {
    throw new Error("invalid ref");
   }
}
RefParser.prototype = {
    getAttributes: function getAttributes() {
        // In this regex, we need to extract a single name-value pair at a time.
        var attParseRegex = /\s([^\s=>]+)\s*=\s*("[^"]*"|'[^']*'|[^\s"']*)/g;
        if(!this.parsedRef) {
            return null;
        }
        var attributes = {};
        while(true) {
            var results = attParseRegex.exec(this.parsedRef[1]);
            if(!results) {
                break;
            }
            attributes[results[1].toLowerCase()] = results[2].PsHTMLUnquote();
        }
        return attributes;
    }
};

// FUNCTIONS

// segregateRefs: Use for segregating refs from content.
function segregateRefs(argWikiText) {
    // Create a random prefix for autogenerated ref names.
    // in theory this has a 1/1296 probability of collision - extremely low
    var prefixChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    var randNo = Math.floor(Math.random() *
        (prefixChars.length * prefixChars.length));
    randPrefix = messages.autoWord + prefixChars.charAt(Math.floor(randNo /
        prefixChars.length)) + prefixChars.charAt(randNo % prefixChars.length) +
        "-";
    
    // Variables for the main code
    var scanner = new RefScanner(argWikiText);
    var unnamedRefs = 0; var refNames = {}; var ref;
    var parser; var attributes; var refName; var refLong; var refShort;
    var refCodes = []; var refEmpty; var emptyRefsWarningGiven = false;
    
    // Disable the empty refs warning (see below) if the user has disabled it
    if(typeof SegregateRefsJsEmptyRefsWarningGiven != "undefined" &&
    SegregateRefsJsEmptyRefsWarningGiven) {
        emptyRefsWarningGiven = true;
    }
    
    while((ref = scanner.getRef())) {
        parser = new RefParser(ref);
        attributes = parser.getAttributes();
        // Does the ref have a name?
        // (Note: No matter how incorrect it seems, the empty string is
        // an acceptable ref name to the MediaWiki parser, as verified by
        // informal testing.)
        if(!attributes.hasOwnProperty("name")) {
            // Bad: it doesn't have one - create a name for it
            refName = randPrefix + (++unnamedRefs).toString(10);
            // Change the corresponding ref code
            refLong = "<ref name=" + refName.PsHTMLQuote() +
                parser.parsedRef[1] + parser.parsedRef[2];
        } else {
            // Good: it has a name.
            refName = attributes.name;
            refLong = ref;
            // Since this script only checks the first occurrence of a ref
            // for contents, inform the user of this limitation if it may
            // pose a problem.
            refEmpty = (parser.parsedRef[2].slice(-2) == "/>") ||
                (parser.parsedRef[2].slice(0, 3) == "></");
            if(!refNames.hasOwnProperty(refName) && refEmpty &&
            !emptyRefsWarningGiven) {
                if(!window.confirm(messages.emptyRefsWarning)) {
                    return false;
                }
                emptyRefsWarningGiven = true;
            }
        }
        
        // Is the ref's name unique?
        if(!refNames.hasOwnProperty(refName)) {
            // Yes: add it to the list of refs
            refNames[refName] = refCodes.length;
            refCodes[refCodes.length] = refLong;
            // Make a short code for the ref
            refShort = "<ref name=" + refName.PsHTMLQuote() + "/>";
            
        } else {
            // Otherwise leave the ref as-is in the original wikitext
            refShort = refLong;
        }
        // Replace the long code with the short code
        scanner.wikiText = scanner.wikiText.PsSetSlice(refShort,
            scanner.refScanRegex.lastIndex - ref.length,
            scanner.refScanRegex.lastIndex);
        // Update lastIndex accordingly
        scanner.refScanRegex.lastIndex += refShort.length - ref.length;
    }
    return {
        wikiText: scanner.wikiText,
        refCodes: refCodes,
        randPrefix: randPrefix
        };
}

// integrateRefs: Use for inserting ref contents back into text
function integrateRefs(argWikiText, argRefText, randPrefix) {
    
    // A function to remove an autogenerated ref name (if possible)
    function cleanRefLong(dirtyRef) {
        var cleanRegex = /^<(ref) name=(?:"[^"]*"|'[^']*'|[^\s"']*)/i;
        return dirtyRef.replace(cleanRegex, "<$1");
    }
    
    // Variables for the main code
    var scanner; var ref; var parser; var attributes; var refCodes = {};
    var usageFreq = {}; var refLong;
    
    // First, we build an associative array of all the ref codes
    // that we might need to put back into the text.
    scanner = new RefScanner(argRefText);
    while((ref = scanner.getRef())) {
        parser = new RefParser(ref);
        attributes = parser.getAttributes();
        if(attributes.hasOwnProperty("name")) {
            // Only use the first ref with each name
            if(!refCodes.hasOwnProperty(attributes.name)) {
                refCodes[attributes.name] = ref;   
            }
        }
    }
    
    // Next, we build an associative array that holds the usage frequency
    // of every ref name used in text.
    scanner = new RefScanner(argWikiText);
    while((ref = scanner.getRef())) {
        parser = new RefParser(ref);
        attributes = parser.getAttributes();
        if(attributes.hasOwnProperty("name")) {
            if(!usageFreq.hasOwnProperty(attributes.name)) {
                // We found a new name
                usageFreq[attributes.name] = 1;
            } else {
                // We already found this name
                usageFreq[attributes.name]++;
            }
        }
    }
    
    
    // Finally, we go through the text again and this time we insert the
    // ref codes where we need to, but only in the first place
    // a ref name appears.
    scanner = new RefScanner(argWikiText);
    while((ref = scanner.getRef())) {
        parser = new RefParser(ref);
        attributes = parser.getAttributes();
        if(attributes.hasOwnProperty("name")) {
            // Is this name on the replacement list?
            if(refCodes.hasOwnProperty(attributes.name)) {
                // Is this name an autogenerated name?
                if(attributes.name.slice(0, randPrefix.length) == randPrefix) {
                    // Yes: is the name used multiple times?
                    if(usageFreq[attributes.name] > 1) {
                        // Multiple: the replacement code should be the same
                        // as that stored in the ref textbox.
                        refLong = refCodes[attributes.name];
                    } else {
                        // Single: replacement code must not include the name,
                        // at least not if the citation was untouched.
                        // (We don't want to add unnecessary autonames)
                        refLong = cleanRefLong(refCodes[attributes.name]);
                    }
                } else {
                    // No: the replacement code should be the same
                    // as that stored in the ref textbox.
                    // (We want to preserve all human-generated names)
                    refLong = refCodes[attributes.name];
                }
                
                // Replace the short code with the long code
                scanner.wikiText = scanner.wikiText.PsSetSlice(refLong,
                    scanner.refScanRegex.lastIndex - ref.length,
                    scanner.refScanRegex.lastIndex);
                // Update lastIndex accordingly
                scanner.refScanRegex.lastIndex += refLong.length - ref.length;
                
                // Delete the name from the replacement list
                delete refCodes[attributes.name];
            }
        }
    }
    return scanner.wikiText;
}

function submitHandler() {
    // Prevent double integration and restore the previous onsubmit value
    this.onsubmit = this.PsOnsubmit;
    
    // Do the actual integration work, updating the textbox
    mainTextbox.value = integrateRefs(mainTextbox.value, refsTextbox.value,
        randPrefix);
    
    // We can delete the header and refs textbox now
    refsDiv.removeChild(refsH2);
    refsDiv.removeChild(refsTextbox);
    
    // Run any previously registered submit handler
    if(this.onsubmit) {
        return this.onsubmit.apply(this, arguments);
    }
    return true;
}

function refsButtonHandler() { // Called when script activated by button click
    // The button should disappear
    this.parentNode.removeChild(this);
    
    // Do the actual segregation work and save the random prefix
    var segFormat = segregateRefs(mainTextbox.value);
    if(!segFormat) {
        return false;
    }
    randPrefix = segFormat.randPrefix;
    
    // wikEd compatibility (frame -> textarea)
    if(typeof wikEdUseWikEd != "undefined" && wikEdUseWikEd) {
        WikEdUpdateTextarea();
    }
    
    // Update the textbox
    mainTextbox.value = segFormat.wikiText;
    
    // wikEd compatibility (textarea -> frame)
    if(typeof wikEdUseWikEd != "undefined" && wikEdUseWikEd) {
        WikEdUpdateFrame();
    }
    
    // Inline refs header
    refsH2 = window.document.createElement("h2");
    refsH2.appendChild(window.document.createTextNode(messages.refsHeader));
    
    // Inline refs textbox
    refsTextbox = window.document.createElement("textarea");
    refsTextbox.id = "PsRefsTextbox";
    refsTextbox.value = segFormat.refCodes.join("\n\n");
    refsTextbox.rows = Math.floor(mainTextbox.rows / 2);
    refsTextbox.cols = mainTextbox.cols;
    
    // Add to document
    refsDiv.appendChild(refsH2);
    refsDiv.appendChild(refsTextbox);
    
    // Set up the submit handler (to integrate refs when done editing)
    
    editForm.PsOnsubmit = editForm.submit;
    editForm.onsubmit = submitHandler;
    
    // Don't submit form
    return false;
}

function loadHandler() { // This function is called on page load
    // Restore the previous load handler;
    window.onload = window.PsOnload;
    
    // Only activate on edit pages
    if(!window.document.getElementById("editform")) {
        return;
    }
    
    // Get the edit form
    editForm = window.document.getElementById("editform");
    // Get the edit box
    mainTextbox = window.document.getElementById("wpTextbox1");
    
    // Make the button
    var refsButton = window.document.createElement("input");
    refsButton.type = "button";
    refsButton.value = messages.buttonText;
    refsButton.setAttribute("style", messages.buttonStyle);
    refsButton.onclick = refsButtonHandler;
    
    // Add the refs div
    refsDiv = window.document.createElement("div");
    refsDiv.appendChild(refsButton);
    editForm.insertBefore(refsDiv,
        window.document.getElementById("editpage-copywarn"));
    
    // Run any previously registered load handler
    if(this.onload) {
        this.onload.apply(this, arguments);
    }
}

// Register load handler
window.PsOnload = window.onload;
window.onload = loadHandler;

} SegregateRefsJs();

//</nowiki></pre>