User:Polygnotus/DuplicateReferences.js: Difference between revisions

Content deleted Content added
No edit summary
No edit summary
 
(35 intermediate revisions by the same user not shown)
Line 1:
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest
 
// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
$(document).ready(function () {
 
const DEBUG = false;
 
function debug(...args) {
if (DEBUG) {
console.log('[DuplicateReferences]', ...args);
}
}
 
if (
debug("Script started");
mw.config.get('wgAction') !== 'view' ||
 
mw.config.get('wgDiffNewId') ||
if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') {
mw.config.get('wgDiffOldId') ||
debug("Not the correct page or action, script terminated");
(mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
return;
) {
}
debug("Not the correct page or action, script terminated");
return;
}
 
debug("Page title:", document.title);
Line 23 ⟶ 29:
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'div' &&
(nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
return nextElement;
Line 70 ⟶ 76:
document.head.appendChild(style);
 
function addDuplicateCitationsTemplate(linkElement) {
debug("Adding duplicate citations template");
showLoading(linkElement);
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
 
let duplicateInfo = getDuplicateInfo();
 
// Get current date
const currentDate = new Date();
Line 93 ⟶ 100:
rvslots: 'main',
formatversion: 2
}).then(function (data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
 
// Define the templates to check for
const templatesToCheck = [
'{{short description',
'{{DISPLAYTITLE',
'{{Lowercase title',
'{{Italic title',
'{{about',
'{{redirect'
];
 
// Find the position to insert the new template
let insertPosition = 0;
let lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim().toLowerCase();
if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {
insertPosition = i + 1;
} else if (line && !line.startsWith('{{') && !line.startsWith('__')) {
break;
}
}
 
// Create the reason string
let reason = 'DuplicateReferences script detected:\n\n';
if (duplicateInfo.length > 0) {
duplicateInfo.forEach((info, index) => {
reason += `* ${info.url} (refs: ${info.refs.map(r => r.number).join(', ')})\n\n`;
if (index < duplicateInfo.length - 1) {
reason += '; ';
}
});
}
 
// InsertCreate the new template with the reasonto parameterinsert
lines.splice(insertPosition,const 0,templateToInsert = `{{DuplicateDuplicated citations|reason=${reason}${dateParam}}}\n`);
 
var newContent = lines.join('\n');
// Use Morebits to handle the template insertion
const wikitextPage = new Morebits.wikitext.page(content);
// Define templates that should come before the duplicated citations template
const precedingTemplates = [
'short description',
'displaytitle',
'lowercase title',
'italic title',
'about',
'redirect',
'distinguish',
'for',
'Featured list',
'Featured article',
'Good article',
'Other uses',
'Redirect2',
'Use mdy dates',
'Use dmy dates',
'Use American English',
'Use British English'
];
 
// Insert the template after the specified templates
// The third parameter is flags (default 'i' for case-insensitive)
// The fourth parameter can include pre-template content like HTML comments
wikitextPage.insertAfterTemplates(templateToInsert, precedingTemplates, 'i', ['<!--[\\s\\S]*?-->']);
var newContent = wikitextPage.getText();
 
let summary = `+{{DuplicateTagged [[WP:DUPREF|duplicate citations]] using [[User:Polygnotus/DuplicateReferences|reason=${reason}${dateParam}}}DuplicateReferences]]`;
 
return api.postWithToken('csrf', {
Line 142 ⟶ 154:
summary: summary
});
}).then(function () {
mw.notifyshowSuccess('Successfully added the Duplicate citations template!'linkElement);
___location.reloadsetTimeout(function (); {
}) ___location.catchreload(function(error) {;
}, 100); // Reload after 0.1 second
}).catch(function (error) {
console.error('Error:', error);
showError(linkElement);
mw.notify('Failed to add the template. See console for details.', {type: 'error'});
});
}
 
function showLoading(element) {
element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
}
 
function showSuccess(element) {
element.innerHTML = '<sup><small>[ Done ]</small></sup>';
}
 
function showError(element) {
element.innerHTML = '<sup><small>[ Error ]</small></sup>';
}
 
Line 169 ⟶ 196:
 
function calculateLevenshteinDistance(a, b) {
console.logdebug("Comparing:");
console.logdebug("Text 1:", a);
console.logdebug("Text 2:", b);
 
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
Line 205 ⟶ 232:
}
 
console.logdebug("Levenshtein distance:", matrix[b.length][a.length]);
return matrix[b.length][a.length];
}
Line 211 ⟶ 238:
function calculateSimilarityPercentage(distance, maxLength) {
const similarity = ((maxLength - distance) / maxLength) * 100;
console.logdebug("Similarity percentage:", similarity.toFixed(2) + "%");
return Math.round(similarity) + '%';
}
Line 217 ⟶ 244:
function getDuplicateInfo() {
debug("Getting duplicate info");
 
const referenceItems = referencesList.children;
debug("Number of reference items:", referenceItems.length);
const urlMap = new Map();
const duplicates = [];
const urlMap = new Map();
const referenceItems = Array.from(referencesList.children);
 
letdebug("Number refNumberof =reference 0items:", referenceItems.length);
 
for (let item of referenceItems) {
referenceItems.forEach((item, index) => {
if (item.tagName.toLowerCase() === 'li') {
refNumber++;
const refId = item.id;
const refNumber = index + 1;
debug(`Processing reference item ${refNumber} (${refId})`);
 
// Get the visible text of the entire reference item
const refText = getVisibleText(item);
Line 240 ⟶ 266:
for (let link of links) {
const url = link.href;
 
// Skip this reference if the URL doesn't contain 'http'
if (!url.includes('http')) {
debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);
return; // This 'return' is equivalent to 'continue' in a regular for loop
}
const linkText = link.textContent.trim();
 
if (
// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
linkText !== "Archived" &&
(!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
!url.startsWith("https://search.worldcat.org/") && // |issn= parameter in cite news
!url.startsWith("https://www.bbc.co.uk/news/live/") && // live articles get frequent updates
!url.startsWith("https://www.aljazeera.com/news/liveblog/") &&
!url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
!url.startsWith("https://www.theguardian.com/world/live/") &&
!url.startsWith("https://www.nytimes.com/live/") &&
!url.startsWith("https://edition.cnn.com/world/live-news/") &&
!url.startsWith("https://www.timesofisrael.com/liveblog") &&
!url.startsWith("https://www.france24.com/en/live-news/") &&
!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book
!url.startsWith("https://archive.org/details/isbn_")
) {
validLink = link;
Line 266 ⟶ 310:
}
}
});
 
urlMap.forEach((refs, url) => {
Line 273 ⟶ 317:
for (let i = 0; i < refs.length - 1; i++) {
for (let j = i + 1; j < refs.length; j++) {
console.logdebug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
Line 281 ⟶ 325:
}
}
duplicates.push({ url, refs });
}
});
Line 290 ⟶ 334:
}
 
function createCollapsibleTable(duplicateInfo) {
const table = document.createElement('table');
function createCollapsibleTable(duplicateInfo) {
table.className = 'wikitable mw-collapsible duplicate-references-table';
const table = document.createElement('table');
table.setAttribute('role', 'presentation');
table.className = 'wikitable mw-collapsible duplicate-references-table';
 
table.setAttribute('role', 'presentation');
const tbody = document.createElement('tbody');
const tbody = document table.createElementappendChild('tbody');
 
table.appendChild(tbody);
const headerRow = document.createElement('tr');
const headerRowheaderCell = document.createElement('trtd');
const headerCell.innerHTML = document.createElement('td<strong>Duplicate References</strong>');
 
headerCell.innerHTML = '<strong>Duplicate References</strong>';
const toggleSpan = document.createElement('span');
const toggleSpan.className = document.createElement('spanmw-collapsible-toggle');
toggleSpan.classNameinnerHTML = '[<a href="#" class="mw-collapsible-toggletext">hide</a>]';
headerCell.appendChild(toggleSpan);
toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
 
headerCell.appendChild(toggleSpan);
 
// Check if the {{Duplicated citations}} template is already present
headerRow.appendChild(headerCell);
const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');
tbody.appendChild(headerRow);
 
// Only add the link if the template is not present
duplicateInfo.forEach(({ url, refs }) => {
if (!duplicatedCitationsTemplate) {
const row = document.createElement('tr');
 
const cell = document.createElement('td');
// Add the "add {{duplicated citations}}" link to the header
let urlLink const addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
urlLink.href = url;
urlLink addTemplateLink.textContenthref = url'#';
addTemplateLink.addEventListener('click', function (e) {
urlLink.target = "_blank";
e.preventDefault();
urlLink.rel = "noopener noreferrer";
addDuplicateCitationsTemplate(this);
cell.appendChild(urlLink });
cell //headerCell.appendChild(document.createTextNode(' in refs: ['));
headerCell.appendChild(addTemplateLink);
//headerCell.appendChild(document.createTextNode(']'));
const originalRef = refs[0];
refs.forEach((ref, index) => { }
let link = documentheaderRow.createElementappendChild('a'headerCell);
linktbody.href = `#${ref.id}`appendChild(headerRow);
 
link.textContent = ref.number;
const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');
cell.appendChild(link);
 
//duplicateInfo.forEach(({url, Addrefs}) similarity=> information{
if (index > 0) {const row = document.createElement('tr');
const similaritycell = calculateSimilarityPercentagedocument.createElement('td');
 
calculateLevenshteinDistance(originalRef.text, ref.text),
// Create report Math.max(originalRef.text.length, ref.text.length)icon
const reportIcon = document.createElement('a');
reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit&section=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
let similarityInfo = document.createElement('span');
reportIcon.innerHTML = '<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
similarityInfo.textContent = ` (${similarity})`;
cellreportIcon.appendChild(similarityInfo)style.marginRight = '5px';
} cell.appendChild(reportIcon);
 
link let urlLink = document.addEventListenercreateElement('mouseovera', () => {;
refsurlLink.forEach(rhref => {url;
const citationElementurlLink.textContent = document.getElementById(r.id)url;
urlLink.target = if (citationElement) {"_blank";
if (rurlLink.idrel === ref.id)"noopener {noreferrer";
 
citationElement.classList.add('duplicate-citation-hover');
} else {cell.appendChild(urlLink);
cell.appendChild(document.createTextNode(' in refs: '));
citationElement.classList.add('duplicate-citation-highlight');
 
}
const originalRef = }refs[0];
}refs.forEach((ref, index); => {
} let link = document.createElement('a');
link.addEventListener('mouseout', ()href => `#${ref.id}`;
refs link.forEach(rtextContent => {ref.number;
const citationElement = documentcell.getElementByIdappendChild(r.idlink);
 
if (citationElement) {
// Add similarity citationElement.classList.remove('duplicate-citation-hover');information
if (index > citationElement.classList.remove('duplicate-citation-highlight'0); {
} const similarity = calculateSimilarityPercentage(
calculateLevenshteinDistance(originalRef.text, ref.text),
});
Math.max(originalRef.text.length, ref.text.length)
});
);
let similarityInfo = document.createElement('span');
link.addEventListener('click', () => {
similarityInfo.textContent = ` (${similarity})`;
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
el.classList cell.removeappendChild('duplicate-citation-clicked'similarityInfo);
});
refs link.forEachaddEventListener('mouseover', (r) => {
const citationElement = document refs.getElementByIdforEach(r.id); => {
if ( const citationElement) {= document.getElementById(r.id);
citationElement.classList.add if ('duplicate-citation-clicked'citationElement); {
} if (r.id === ref.id) {
citationElement.classList.add('duplicate-citation-hover');
});
}); else {
citationElement.classList.add('duplicate-citation-highlight');
if (index < refs.length - 1) { }
cell.appendChild(document.createTextNode(', ')); }
});
});
link.addEventListener('mouseout', () => {
row refs.appendChildforEach(cell);r => {
const citationElement = document.getElementById(r.id);
tbody.appendChild(row);
if (citationElement) {
});
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
return table;
}
}
});
});
 
link.addEventListener('click', () => {
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
el.classList.remove('duplicate-citation-clicked');
});
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.add('duplicate-citation-clicked');
}
});
});
 
if (index < refs.length - 1) {
cell.appendChild(document.createTextNode(', '));
}
});
 
row.appendChild(cell);
tbody.appendChild(row);
});
 
return table;
}
 
function checkDuplicateReferenceLinks() {
debug("Checking for duplicate reference links");
const duplicateInfo = getDuplicateInfo();
 
if (duplicateInfo.length > 0) {
debug("Duplicates found, creating collapsible table");
 
if (document.querySelector('table.box-Duplicated_citations') === null) {
const editSections = containerDiv.querySelectorAll('span.mw-editsection');
editSections.forEach(editSection => {
let spanBefore = document.createElement('span');
spanBefore.className = 'mw-editsection-bracket';
spanBefore.textContent = '[';
let addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function(e) {
e.preventDefault();
addDuplicateCitationsTemplate();
});
let spanAfter = document.createElement('span');
spanAfter.className = 'mw-editsection-bracket';
spanAfter.textContent = ']';
editSection.appendChild(spanBefore);
editSection.appendChild(addTemplateLink);
editSection.appendChild(spanAfter);
});
}
const table = createCollapsibleTable(duplicateInfo);
containerDiv.after(table);
Line 445 ⟶ 488:
setTableState(initialState);
 
toggleLink.addEventListener('click', function (e) {
e.preventDefault();
const isCurrentlyCollapsed = tableBody.is(':hidden');
Line 454 ⟶ 497:
}
}
 
checkDuplicateReferenceLinks();
debug("Script execution completed");