User:Polygnotus/DuplicateReferences.js: Difference between revisions

Content deleted Content added
highlight on hover
No edit summary
 
(104 intermediate revisions by the same user not shown)
Line 1:
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest
 
// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
$(document).ready(function () {
 
if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') {
const DEBUG = false;
 
function debug(...args) {
if (DEBUG) {
console.log('[DuplicateReferences]', ...args);
}
}
 
if (
mw.config.get('wgAction') !== 'view' ||
mw.config.get('wgDiffNewId') ||
mw.config.get('wgDiffOldId') ||
(mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
) {
debug("Not the correct page or action, script terminated");
return;
}
 
debug("Page title:", document.title);
debug("URL:", window.___location.href);
 
function findNextReflistDiv(element) {
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'div' &&
(nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
return nextElement;
}
nextElement = nextElement.nextElementSibling;
}
return null;
}
 
const referencesHeader = document.querySelector("h2#References");
if (!referencesHeader) {
debug("References heading not found, script terminated");
return;
}
 
letconst referencesHeadingcontainerDiv = documentreferencesHeader.getElementByIdclosest("Referencesdiv");
if (!referencesHeadingcontainerDiv) {
debug("Container div not found, script terminated");
return;
}
 
const reflistDiv = findNextReflistDiv(containerDiv);
if (!reflistDiv) {
debug("Reflist div not found, script terminated");
return;
}
 
const referencesList = reflistDiv.querySelector('ol.references');
if (!referencesList) {
debug("ol.references not found within reflist div");
return;
}
 
const style = document.createElement('style');
style.textContent = `
li:target { border: 4px1px dotted red; padding: 2px; background-color: #ffcccc !important;}
.duplicate-citation-highlight { background-color: #ffe6e6e1eeff; }
.duplicate-citation-highlighthover { background-color: #ffcccccce0ff; border: 1px dotted blue; }
.duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
.mw-collapsible-toggle { font-weight: normal; float: right; }
.duplicate-references-table { width: 100%; }
@media only screen and (max-width: 768px) {
.duplicate-references-table { display: none; }
}
`;
document.head.appendChild(style);
 
function addDuplicateCitationsTemplate(linkElement) {
let parentDiv = referencesHeading.closest("div");
debug("Adding duplicate citations template");
let newParagraph = document.createElement("p");
showLoading(linkElement);
newParagraph.style.color = "red";
function addDuplicateCitationsTemplate() {
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
 
// First, get the duplicate information
let duplicateInfo = getDuplicateInfo();
 
// Get current date
const currentDate = new Date();
const monthNames = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
];
const currentMonth = monthNames[currentDate.getMonth()];
const currentYear = currentDate.getFullYear();
const dateParam = `|date=${currentMonth} ${currentYear}`;
 
api.get({
Line 36 ⟶ 100:
rvslots: 'main',
formatversion: 2
}).then(function (data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
 
// AddCreate the templatereason at the top of the pagestring
varlet newContentreason = '{{DuplicateDuplicateReferences citations}}script detected:\n\n' + content;
 
// Create the edit summary
let summary = '+{{Duplicate citations}}';
if (duplicateInfo.length > 0) {
summaryduplicateInfo.forEach((info) +=> ': ';{
duplicateInfo reason += `* ${info.forEach(url} (refs: ${info, index).refs.map(r => {r.number).join(', ')})\n\n`;
summary += `${info.url} (refs: ${info.refs.join(', ')})`;
if (index < duplicateInfo.length - 1) {
summary += '; ';
}
});
}
 
// MakeCreate the edittemplate to insert
const templateToInsert = `{{Duplicated citations|reason=${reason}${dateParam}}}\n`;
 
// Use Morebits to handle the template insertion
const wikitextPage = new Morebits.wikitext.page(content);
// Define templates that should come before the duplicated citations template
const precedingTemplates = [
'short description',
'displaytitle',
'lowercase title',
'italic title',
'about',
'redirect',
'distinguish',
'for',
'Featured list',
'Featured article',
'Good article',
'Other uses',
'Redirect2',
'Use mdy dates',
'Use dmy dates',
'Use American English',
'Use British English'
];
 
// Insert the template after the specified templates
// The third parameter is flags (default 'i' for case-insensitive)
// The fourth parameter can include pre-template content like HTML comments
wikitextPage.insertAfterTemplates(templateToInsert, precedingTemplates, 'i', ['<!--[\\s\\S]*?-->']);
var newContent = wikitextPage.getText();
 
let summary = `Tagged [[WP:DUPREF|duplicate citations]] using [[User:Polygnotus/DuplicateReferences|DuplicateReferences]]`;
 
return api.postWithToken('csrf', {
action: 'edit',
Line 62 ⟶ 154:
summary: summary
});
}).then(function () {
mw.notifyshowSuccess('Successfully added the Duplicate citations template!'linkElement);
//setTimeout(function Reload() the page to show the changes{
___location.reload();
}, 100); // Reload after 0.catch(function(error)1 {second
}).catch(function (error) {
console.error('Error:', error);
showError(linkElement);
mw.notify('Failed to add the template. See console for details.', {type: 'error'});
});
}
 
function getDuplicateInfoshowLoading(element) {
element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
const referenceSpans = document.querySelectorAll('span.reference-text');
const urlMap = new Map();}
const duplicates = [];
 
function referenceSpans.forEachshowSuccess((spanelement) => {
element.innerHTML = '<sup><small>[ Done ]</small></sup>';
const links = span.querySelectorAll('a');
}
const refNumber = span.closest('li')?.id.split('-').pop() || 'Unknown';
 
function showError(element) {
let validLink = null;
element.innerHTML = '<sup><small>[ Error for (let link of links) {]</small></sup>';
}
const url = link.href;
const linkText = link.textContent.trim();
if (
linkText !== "Archived" &&
!url.includes("wikipedia.org/wiki/") &&
!url.includes("_(identifier)")
) {
validLink = link;
break;
}
}
 
function if getVisibleText(validLinkelement) {
// Recursively get the visible text content of constan url = validLink.href;element
let text = if (urlMap.has(url)) {'';
for (let node of urlMapelement.get(urlchildNodes).push(refNumber); {
if (node.nodeType === } elseNode.TEXT_NODE) {
text += urlMapnode.settextContent.trim(url, [refNumber]) + ' ';
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Skip hidden elements
const style = window.getComputedStyle(node);
if (style.display !== 'none' && style.visibility !== 'hidden') {
text += getVisibleText(node) + ' ';
}
}
});
return text.trim();
}
 
function urlMap.forEachcalculateLevenshteinDistance((refsa, urlb) => {
if debug(refs.length > 1"Comparing:") {;
duplicates.pushdebug({"Text url1:", refs }a);
debug("Text 2:", }b);
});
 
if (a.length === 0) return duplicatesb.length;
if (b.length === 0) return a.length;
}
 
const matrix = [];
function calculateSimilarity(str1, str2) {
 
const longer = str1.length > str2.length ? str1 : str2;
const// shorterIncrement =along str1.lengththe >first str2.lengthcolumn ?of str2each : str1;row
constfor longerLength(let i = longer0; i <= b.length; i++) {
if (longerLength === 0) {matrix[i] = [i];
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
 
// Increment each column in the first row
function editDistance(s1, s2) {
s1for (let j = s10; j <= a.toLowerCase()length; j++) {
s2 matrix[0][j] = s2.toLowerCase()j;
}
 
const// costsFill =in newthe Array();rest of the matrix
for (let i = 01; i <= s1b.length; i++) {
for (let lastValuej = i1; j <= a.length; j++) {
for (let j = 0;if j(b.charAt(i <- 1) === s2a.length; charAt(j++ - 1)) {
if ( matrix[i][j] == 0)matrix[i - 1][j - 1];
} else costs[j] = j;{
else { matrix[i][j] = Math.min(
if ( matrix[i - 1][j >- 1] + 1, 0)// {substitution
let newValue = costs[j - 1];Math.min(
if (s1.charAt( matrix[i][j - 1)] !=+ s2.charAt(j1, -// 1))insertion
newValuematrix[i =- Math.min(Math.min(newValue,1][j] lastValue),+ 1 // deletion
costs[j]) + 1;
costs[j - 1] = lastValue);
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
 
debug("Levenshtein distance:", matrix[b.length][a.length]);
function getAllVisibleText(element) {
ifreturn (elementmatrix[b.nodeType === Nodelength][a.TEXT_NODE) {length];
return element.textContent.trim();
}
if (element.nodeType === Node.ELEMENT_NODE) {
if (window.getComputedStyle(element).display === 'none') {
return '';
}
return Array.from(element.childNodes)
.map(child => getAllVisibleText(child))
.join(' ')
.replace(/\s+/g, ' ')
.trim();
}
return '';
}
 
function extractVisibleTextcalculateSimilarityPercentage(htmlStringdistance, maxLength) {
const parsersimilarity = new DOMParser((maxLength - distance) / maxLength) * 100;
debug("Similarity percentage:", similarity.toFixed(2) + "%");
const doc = parser.parseFromString(htmlString, 'text/html');
return getAllVisibleText(docMath.bodyround(similarity) + '%';
}
 
function checkDuplicateReferenceLinksgetDuplicateInfo() {
debug("Getting duplicate info");
const referenceSpans = document.querySelectorAll('span.reference-text');
 
const duplicates = [];
const urlMap = new Map();
const duplicatesreferenceItems = new MapArray.from(referencesList.children);
 
let totalLinks = 0;
debug("Number of reference items:", referenceItems.length);
 
// Create a map of cite_note ids to their correct reference numbers
constreferenceItems.forEach((item, citeNoteMapindex) => new Map();{
document.querySelectorAll if ('item.reference')tagName.forEachtoLowerCase(ref) =>== 'li') {
const linkrefId = refitem.querySelector('a')id;
if (link) { const refNumber = index + 1;
constdebug(`Processing reference linkHrefitem =${refNumber} link.getAttribute('href'${refId})`);
 
if (linkHref) {
// Get the visible consttext citeNoteIdof =the linkHref.substring(1); // Remove theentire leadingreference '#'item
const refNumberrefText = citeNoteId.split('-').popgetVisibleText(item);
debug(` Reference text: citeNoteMap.set(citeNoteId, refNumber${refText}`);
 
// Find the first valid link in the reference
const links = item.querySelectorAll('a');
let validLink = null;
for (let link of links) {
const url = link.href;
 
// Skip this reference if the URL doesn't contain 'http'
if (!url.includes('http')) {
debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);
return; // This 'return' is equivalent to 'continue' in a regular for loop
}
const linkText = link.textContent.trim();
 
if (
// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
linkText !== "Archived" &&
!url.includes("wikipedia.org") &&
!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
!url.startsWith("https://search.worldcat.org/") && // |issn= parameter in cite news
!url.startsWith("https://www.bbc.co.uk/news/live/") && // live articles get frequent updates
!url.startsWith("https://www.aljazeera.com/news/liveblog/") &&
!url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
!url.startsWith("https://www.theguardian.com/world/live/") &&
!url.startsWith("https://www.nytimes.com/live/") &&
!url.startsWith("https://edition.cnn.com/world/live-news/") &&
!url.startsWith("https://www.timesofisrael.com/liveblog") &&
!url.startsWith("https://www.france24.com/en/live-news/") &&
!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book
!url.startsWith("https://archive.org/details/isbn_")
) {
validLink = link;
debug(` Valid link found: ${url}`);
break;
}
}
 
}
} if (validLink); {
const url = validLink.href;
referenceSpans if (urlMap.forEachhas((spanurl)) => {
urlMap.get(url).push({id: refId, number: refNumber, text: refText});
const links = span.querySelectorAll('a');
const citeNote = span.closest debug('li'` Duplicate found for URL: ${url}`)?.id;
const refNumber = citeNoteMap.get(citeNote) || 'Unknown';
const refText = extractVisibleText(span.outerHTML); // Extract visible text content
let validLink = null;
for (let link of links) {
const url = link.href;
const linkText = link.textContent.trim();
if (
linkText !== "Archived" &&
!url.includes("wikipedia.org/wiki/") &&
!url.includes("_(identifier)")
) {
validLink = link;
break;
}
}
if (validLink) {
const url = validLink.href;
totalLinks++;
if (urlMap.has(url)) {
if (duplicates.has(url)) {
duplicates.get(url).push({ refNumber, citeNote, refText });
} else {
duplicatesurlMap.set(url, [urlMap.get(url){id: refId, {number: refNumber, citeNote,text: refText }]);
debug(` New URL added to map: ${url}`);
}
} else {
urlMap.setdebug(url,` { refNumber,No citeNote,valid refTextlink }found in this item`);
}
}
});
 
if (duplicatesurlMap.sizeforEach((refs, > 0url) => {
if (documentrefs.querySelector('table.box-Duplicated_citations')length ===> null1) {
const// editSectionsCalculate =Levenshtein parentDiv.querySelectorAll('span.mw-editsection');distance for each pair of refs
for (let i = 0; i < refs.length - 1; i++) {
editSections.forEach for (editSectionlet j => i + 1; j < refs.length; j++) {
let spanBefore = document.createElement debug('span'`Comparing references ${refs[i].number} and ${refs[j].number}:`);
spanBefore.className const distance = 'mw-editsection-bracket'calculateLevenshteinDistance(refs[i].text, refs[j].text);
spanBefore.textContent const maxLength = 'Math.max(refs['i].text.length, refs[j].text.length);
const similarity = calculateSimilarityPercentage(distance, maxLength);
let addTemplateLink refs[i].similarity = documentrefs[i].createElement('a')similarity || {};
addTemplateLink.textContent = ' add {{duplicatedrefs[i].similarity[refs[j].id] citations}}= 'similarity;
addTemplateLink.href = '#';}
addTemplateLink.addEventListener('click', function(e) {}
eduplicates.preventDefaultpush({url, refs});
addDuplicateCitationsTemplate();
});
let spanAfter = document.createElement('span');
spanAfter.className = 'mw-editsection-bracket';
spanAfter.textContent = ']';
editSection.appendChild(spanBefore);
editSection.appendChild(addTemplateLink);
editSection.appendChild(spanAfter);
});
}
});
duplicates.forEach((refInfo, url) => {
let paragraphInfo = document.createElement('span');
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
paragraphInfo.appendChild(document.createTextNode('Duplicate URL: '));
paragraphInfo.appendChild(urlLink);
paragraphInfo.appendChild(document.createTextNode(' in refs: '));
refInfo.forEach((ref, index) => {
if (ref.citeNote) {
let link = document.createElement('a');
link.href = `#${ref.citeNote}`;
link.textContent = ref.refNumber;
paragraphInfo.appendChild(link);
 
debug("Number // Highlight theof duplicate citations in thesets referencefound:", listduplicates.length);
debug("Duplicate sets:", duplicates);
const citationElement = document.getElementById(ref.citeNote);
return duplicates;
}
 
function createCollapsibleTable(duplicateInfo) {
const table = document.createElement('table');
table.className = 'wikitable mw-collapsible duplicate-references-table';
table.setAttribute('role', 'presentation');
 
const tbody = document.createElement('tbody');
table.appendChild(tbody);
 
const headerRow = document.createElement('tr');
const headerCell = document.createElement('td');
headerCell.innerHTML = '<strong>Duplicate References</strong>';
 
const toggleSpan = document.createElement('span');
toggleSpan.className = 'mw-collapsible-toggle';
toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
headerCell.appendChild(toggleSpan);
 
 
// Check if the {{Duplicated citations}} template is already present
const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');
 
// Only add the link if the template is not present
if (!duplicatedCitationsTemplate) {
 
// Add the "add {{duplicated citations}}" link to the header
const addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function (e) {
e.preventDefault();
addDuplicateCitationsTemplate(this);
});
//headerCell.appendChild(document.createTextNode(' ['));
headerCell.appendChild(addTemplateLink);
//headerCell.appendChild(document.createTextNode(']'));
}
headerRow.appendChild(headerCell);
tbody.appendChild(headerRow);
 
const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');
 
duplicateInfo.forEach(({url, refs}) => {
const row = document.createElement('tr');
const cell = document.createElement('td');
 
// Create report icon
const reportIcon = document.createElement('a');
reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit&section=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
reportIcon.innerHTML = '<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
reportIcon.style.marginRight = '5px';
cell.appendChild(reportIcon);
 
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
 
cell.appendChild(urlLink);
cell.appendChild(document.createTextNode(' in refs: '));
 
const originalRef = refs[0];
refs.forEach((ref, index) => {
let link = document.createElement('a');
link.href = `#${ref.id}`;
link.textContent = ref.number;
cell.appendChild(link);
 
// Add similarity information
if (index > 0) {
const similarity = calculateSimilarityPercentage(
calculateLevenshteinDistance(originalRef.text, ref.text),
Math.max(originalRef.text.length, ref.text.length)
);
let similarityInfo = document.createElement('span');
similarityInfo.textContent = ` (${similarity})`;
cell.appendChild(similarityInfo);
}
link.addEventListener('mouseover', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElementif (r.classListid === ref.add('duplicate-citation'id); {
link citationElement.classList.addEventListeneradd('mouseoverduplicate-citation-hover', () => {;
} else {
citationElement.classList.add('duplicate-citation-highlight');
});
link.addEventListener('mouseout', () => {
citationElement.classList.remove('duplicate-citation-highlight');
});
}
} else {);
paragraphInfo.appendChild(document.createTextNode(ref.refNumber));
}
// Calculate similarity with the next reference
if (index < refInfo.length - 1) {
const similarity = calculateSimilarity(ref.refText, refInfo[index + 1].refText);
const similarityPercentage = Math.round(similarity * 100);
paragraphInfo.appendChild(document.createTextNode(` (${similarityPercentage}%)`));
}
if (index < refInfo.length - 1) {
paragraphInfo.appendChild(document.createTextNode(', '));
}
});
link.addEventListener('mouseout', () => {
paragraphInfo refs.appendChildforEach(document.createElement('br'));r => {
newParagraph const citationElement = document.appendChildgetElementById(paragraphInfor.id);
if (citationElement) {
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
}
});
});
 
link.addEventListener('click', () => {
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
el.classList.remove('duplicate-citation-clicked');
});
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.add('duplicate-citation-clicked');
}
});
});
 
if (index < refs.length - 1) {
cell.appendChild(document.createTextNode(', '));
}
});
 
parentDivrow.afterappendChild(newParagraphcell);
tbody.appendChild(row);
});
 
return table;
}
 
function checkDuplicateReferenceLinks() {
debug("Checking for duplicate reference links");
const duplicateInfo = getDuplicateInfo();
 
if (duplicateInfo.length > 0) {
debug("Duplicates found, creating collapsible table");
 
const table = createCollapsibleTable(duplicateInfo);
containerDiv.after(table);
 
// Set up collapsible functionality
const toggleLink = table.querySelector('.mw-collapsible-toggle a');
const tableBody = $(table).find('tr:not(:first-child)');
const storageKey = 'duplicateReferencesTableState';
 
function setTableState(isCollapsed) {
if (isCollapsed) {
tableBody.hide();
toggleLink.textContent = 'show';
} else {
tableBody.show();
toggleLink.textContent = 'hide';
}
localStorage.setItem(storageKey, isCollapsed);
}
 
// Initialize state from localStorage
const initialState = localStorage.getItem(storageKey) === 'true';
setTableState(initialState);
 
toggleLink.addEventListener('click', function (e) {
e.preventDefault();
const isCurrentlyCollapsed = tableBody.is(':hidden');
setTableState(!isCurrentlyCollapsed);
});
} else {
debug("No duplicates found");
}
}
 
checkDuplicateReferenceLinks();
debug("Script execution completed");
});
});
// </nowiki>