User:Polygnotus/DuplicateReferences.js: Difference between revisions

Content deleted Content added
No edit summary
No edit summary
 
(68 intermediate revisions by the same user not shown)
Line 1:
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest
 
// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
$(document).ready(function () {
 
console.log("Script started");
const DEBUG = false;
 
if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') {
function debug(...args) {
console.log("Not the correct page or action, script terminated");
if (DEBUG) {
console.log('[DuplicateReferences]', ...args);
}
}
 
if (
mw.config.get('wgAction') !== 'view' ||
mw.config.get('wgDiffNewId') ||
mw.config.get('wgDiffOldId') ||
(mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
) {
debug("Not the correct page or action, script terminated");
return;
}
 
debug("Page title:", document.title);
debug("URL:", window.___location.href);
 
function findNextReflistDiv(element) {
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'div' &&
(nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
return nextElement;
}
nextElement = nextElement.nextElementSibling;
}
return null;
}
 
const referencesHeader = document.querySelector("h2#References");
if (!referencesHeader) {
debug("References heading not found, script terminated");
return;
}
console.log("Page title:", document.title);
console.log("URL:", window.___location.href);
 
letconst referencesHeadingcontainerDiv = documentreferencesHeader.getElementByIdclosest("Referencesdiv");
if (!referencesHeadingcontainerDiv) {
console.logdebug("ReferencesContainer headingdiv not found, script terminated");
return;
}
 
const reflistDiv = findNextReflistDiv(containerDiv);
if (!reflistDiv) {
debug("Reflist div not found, script terminated");
return;
}
 
const referencesList = reflistDiv.querySelector('ol.references');
if (!referencesList) {
debug("ol.references not found within reflist div");
return;
}
 
const style = document.createElement('style');
style.textContent = `
li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
.duplicate-citation-highlight { background-color: #ffe6e6e1eeff; }
.duplicate-citation-hover { background-color: #ffcccccce0ff; border: 1px dotted blue; }
.duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
.mw-collapsible-toggle { font-weight: normal; float: right; }
.duplicate-references-table { width: 100%; }
@media only screen and (max-width: 768px) {
.duplicate-references-table { display: none; }
}
`;
document.head.appendChild(style);
 
function addDuplicateCitationsTemplate(linkElement) {
let parentDiv = referencesHeading.closest("div");
debug("Adding duplicate citations template");
let newParagraph = document.createElement("p");
showLoading(linkElement);
newParagraph.style.color = "red";
function addDuplicateCitationsTemplate() {
console.log("Adding duplicate citations template");
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
 
let duplicateInfo = getDuplicateInfo();
 
// Get current date
const currentDate = new Date();
const monthNames = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
];
const currentMonth = monthNames[currentDate.getMonth()];
const currentYear = currentDate.getFullYear();
const dateParam = `|date=${currentMonth} ${currentYear}`;
 
api.get({
Line 44 ⟶ 100:
rvslots: 'main',
formatversion: 2
}).then(function (data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
 
// AddCreate the templatereason at the top of the pagestring
varlet newContentreason = '{{DuplicateDuplicateReferences citations}}script detected:\n\n' + content;
 
// Create the edit summary.
let summary = '+{{Duplicate citations}}';
if (duplicateInfo.length > 0) {
summaryduplicateInfo.forEach((info) +=> ': ';{
duplicateInfo reason += `* ${info.forEach(url} (refs: ${info, index).refs.map(r => {r.number).join(', ')})\n\n`;
summary += `${info.url} (refs: ${info.refs.join(', ')})`;
if (index < duplicateInfo.length - 1) {
summary += '; ';
}
});
}
 
// MakeCreate the edittemplate to insert
const templateToInsert = `{{Duplicated citations|reason=${reason}${dateParam}}}\n`;
 
// Use Morebits to handle the template insertion
const wikitextPage = new Morebits.wikitext.page(content);
// Define templates that should come before the duplicated citations template
const precedingTemplates = [
'short description',
'displaytitle',
'lowercase title',
'italic title',
'about',
'redirect',
'distinguish',
'for',
'Featured list',
'Featured article',
'Good article',
'Other uses',
'Redirect2',
'Use mdy dates',
'Use dmy dates',
'Use American English',
'Use British English'
];
 
// Insert the template after the specified templates
// The third parameter is flags (default 'i' for case-insensitive)
// The fourth parameter can include pre-template content like HTML comments
wikitextPage.insertAfterTemplates(templateToInsert, precedingTemplates, 'i', ['<!--[\\s\\S]*?-->']);
var newContent = wikitextPage.getText();
 
let summary = `Tagged [[WP:DUPREF|duplicate citations]] using [[User:Polygnotus/DuplicateReferences|DuplicateReferences]]`;
 
return api.postWithToken('csrf', {
action: 'edit',
Line 70 ⟶ 154:
summary: summary
});
}).then(function () {
mw.notifyshowSuccess('Successfully added the Duplicate citations template!'linkElement);
//setTimeout(function Reload() the page to show the changes{
___location.reload();
}, 100); // Reload after 0.catch(function(error)1 {second
}).catch(function (error) {
console.error('Error:', error);
showError(linkElement);
mw.notify('Failed to add the template. See console for details.', {type: 'error'});
});
}
 
function getDuplicateInfoshowLoading(element) {
element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
console.log("Getting duplicate info");
}
const referencesHeading = document.getElementById("References");
 
if (!referencesHeading) {
function showSuccess(element) {
console.log("References heading not found");
element.innerHTML = '<sup><small>[ Done return []</small></sup>';
}
 
function showError(element) {
element.innerHTML = '<sup><small>[ Error ]</small></sup>';
}
 
function getVisibleText(element) {
// Recursively get the visible text content of an element
let text = '';
for (let node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
text += node.textContent.trim() + ' ';
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Skip hidden elements
const style = window.getComputedStyle(node);
if (style.display !== 'none' && style.visibility !== 'hidden') {
text += getVisibleText(node) + ' ';
}
}
}
return text.trim();
console.log("References heading found:", referencesHeading.outerHTML);
}
 
let currentElement = referencesHeading.nextElementSibling;
function calculateLevenshteinDistance(a, b) {
console.log("Next element after References heading:", currentElement ? currentElement.outerHTML : "None");
debug("Comparing:");
debug("Text 1:", a);
if (!currentElement || !currentElement.classList.contains('mw-references-wrap')) {
debug("Text 2:", b);
console.log("References list (mw-references-wrap) not found after References heading");
 
return [];
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
 
const matrix = [];
 
// Increment along the first column of each row
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
 
// Increment each column in the first row
console.log("References list found:", currentElement.outerHTML);
for (let j = 0; j <= a.length; j++) {
const referencesList matrix[0][j] = currentElement.querySelector('ol.references')j;
if (!referencesList) {
console.log("ol.references not found within mw-references-wrap");
return [];
}
 
const// referenceItemsFill =in referencesList.querySelectorAll('li');the rest of the matrix
console.logfor ("Numberlet ofi reference= items:",1; i <= referenceItemsb.length); i++) {
for (let j = 1; j <= a.length; j++) {
const urlMap = new Map if (b.charAt(i - 1); === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1, // substitution
Math.min(
matrix[i][j - 1] + 1, // insertion
matrix[i - 1][j] + 1 // deletion
)
);
}
}
}
 
debug("Levenshtein distance:", matrix[b.length][a.length]);
return matrix[b.length][a.length];
}
 
function calculateSimilarityPercentage(distance, maxLength) {
const similarity = ((maxLength - distance) / maxLength) * 100;
debug("Similarity percentage:", similarity.toFixed(2) + "%");
return Math.round(similarity) + '%';
}
 
function getDuplicateInfo() {
debug("Getting duplicate info");
 
const duplicates = [];
const urlMap = new Map();
const referenceItems = Array.from(referencesList.children);
 
debug("Number of reference items:", referenceItems.length);
 
referenceItems.forEach((item, index) => {
constif refNumber(item.tagName.toLowerCase() === index + 1; // This is the correct reference'li') number{
console.log(`Processing reference item ${refNumber}`) const refId = item.id;
const spanrefNumber = item.querySelector('span.reference-text')index + 1;
if debug(!span)`Processing reference item ${refNumber} (${refId})`);
console.log(` No reference-text span found in item ${refNumber}`);
return;
}
const links = span.querySelectorAll('a');
console.log(` Number of links in this span: ${links.length}`);
 
let validLink = null; // Get the visible text of the entire reference item
for (let link of links)const {refText = getVisibleText(item);
constdebug(` url =Reference link.hreftext: ${refText}`);
 
const linkText = link.textContent.trim();
// Find the first valid link in the reference
ifconst links = item.querySelectorAll('a');
let validLink linkText !== "Archived" &&null;
for (let link of (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")links) &&{
!const url = link.includes("_(identifier)")href;
 
) {
validLink// =Skip link;this reference if the URL doesn't contain 'http'
consoleif (!url.logincludes(`'http')) Valid link found: ${url}`);
break debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);
return; // This 'return' is equivalent to 'continue' in a regular for loop
}
const linkText = link.textContent.trim();
 
if (
// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
linkText !== "Archived" &&
!url.includes("wikipedia.org") &&
!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
!url.startsWith("https://search.worldcat.org/") && // |issn= parameter in cite news
!url.startsWith("https://www.bbc.co.uk/news/live/") && // live articles get frequent updates
!url.startsWith("https://www.aljazeera.com/news/liveblog/") &&
!url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
!url.startsWith("https://www.theguardian.com/world/live/") &&
!url.startsWith("https://www.nytimes.com/live/") &&
!url.startsWith("https://edition.cnn.com/world/live-news/") &&
!url.startsWith("https://www.timesofisrael.com/liveblog") &&
!url.startsWith("https://www.france24.com/en/live-news/") &&
!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book
!url.startsWith("https://archive.org/details/isbn_")
) {
validLink = link;
debug(` Valid link found: ${url}`);
break;
}
}
}
 
if (validLink) {
const url = validLink.href;
if (urlMap.has(url)) {
urlMap.get(url).push({id: refId, number: refNumber.toString(), text: refText});
console.log debug(` Duplicate found for URL: ${url}`);
} else {
urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
debug(` New URL added to map: ${url}`);
}
} else {
urlMap.setdebug(url,` [refNumber.toString()] No valid link found in this item`);
console.log(` New URL added to map: ${url}`);
}
} else {
console.log(` No valid link found in this item`);
}
});
Line 154 ⟶ 314:
urlMap.forEach((refs, url) => {
if (refs.length > 1) {
duplicates.push({// url,Calculate refsLevenshtein });distance for each pair of refs
for (let i = 0; i < refs.length - 1; i++) {
for (let j = i + 1; j < refs.length; j++) {
debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
const similarity = calculateSimilarityPercentage(distance, maxLength);
refs[i].similarity = refs[i].similarity || {};
refs[i].similarity[refs[j].id] = similarity;
}
}
duplicates.push({url, refs});
}
});
 
console.logdebug("Number of duplicate sets found:", duplicates.length);
console.logdebug("Duplicate sets:", duplicates);
return duplicates;
}
 
function calculateSimilaritycreateCollapsibleTable(str1, str2duplicateInfo) {
const longertable = str1document.length > str2.length ? str1 : str2createElement('table');
table.className = 'wikitable mw-collapsible duplicate-references-table';
const shorter = str1.length > str2.length ? str2 : str1;
consttable.setAttribute('role', longerLength = longer.length'presentation');
 
if (longerLength === 0) {
const tbody = return 1document.0createElement('tbody');
table.appendChild(tbody);
 
const headerRow = document.createElement('tr');
const headerCell = document.createElement('td');
headerCell.innerHTML = '<strong>Duplicate References</strong>';
 
const toggleSpan = document.createElement('span');
toggleSpan.className = 'mw-collapsible-toggle';
toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
headerCell.appendChild(toggleSpan);
 
 
// Check if the {{Duplicated citations}} template is already present
const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');
 
// Only add the link if the template is not present
if (!duplicatedCitationsTemplate) {
 
// Add the "add {{duplicated citations}}" link to the header
const addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function (e) {
e.preventDefault();
addDuplicateCitationsTemplate(this);
});
//headerCell.appendChild(document.createTextNode(' ['));
headerCell.appendChild(addTemplateLink);
//headerCell.appendChild(document.createTextNode(']'));
}
headerRow.appendChild(headerCell);
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
tbody.appendChild(headerRow);
}
 
const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
 
constduplicateInfo.forEach(({url, costsrefs}) => new Array();{
for (let i = 0;const irow <= s1document.lengthcreateElement('tr'); i++) {
letconst lastValuecell = idocument.createElement('td');
 
for (let j = 0; j <= s2.length; j++) {
// Create report if (i == 0)icon
const costs[j]reportIcon = jdocument.createElement('a');
reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit&section=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
else {
reportIcon.innerHTML = '<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
if (j > 0) {
let newValuereportIcon.style.marginRight = costs[j - 1]'5px';
if (s1cell.charAtappendChild(i - 1) != s2.charAt(j - 1)reportIcon);
 
newValue = Math.min(Math.min(newValue, lastValue),
let urlLink = costs[j]document.createElement('a') + 1;
costs[j - 1]urlLink.href = lastValueurl;
lastValueurlLink.textContent = newValueurl;
urlLink.target = }"_blank";
urlLink.rel = "noopener noreferrer";
 
cell.appendChild(urlLink);
cell.appendChild(document.createTextNode(' in refs: '));
 
const originalRef = refs[0];
refs.forEach((ref, index) => {
let link = document.createElement('a');
link.href = `#${ref.id}`;
link.textContent = ref.number;
cell.appendChild(link);
 
// Add similarity information
if (index > 0) {
const similarity = calculateSimilarityPercentage(
calculateLevenshteinDistance(originalRef.text, ref.text),
Math.max(originalRef.text.length, ref.text.length)
);
let similarityInfo = document.createElement('span');
similarityInfo.textContent = ` (${similarity})`;
cell.appendChild(similarityInfo);
}
} link.addEventListener('mouseover', () => {
if refs.forEach(ir => 0){
costs[s2.length] const citationElement = lastValuedocument.getElementById(r.id);
} if (citationElement) {
return costs[s2 if (r.length];id === ref.id) {
citationElement.classList.add('duplicate-citation-hover');
}
} else {
citationElement.classList.add('duplicate-citation-highlight');
}
}
});
});
link.addEventListener('mouseout', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
}
});
});
 
link.addEventListener('click', () => {
function getAllVisibleText(element) {
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
if (element.nodeType === Node.TEXT_NODE) {
return element el.textContentclassList.trimremove('duplicate-citation-clicked');
});
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (element.nodeType === Node.ELEMENT_NODE) {
if (window.getComputedStyle(element).display === 'none' if (citationElement) {
return citationElement.classList.add('duplicate-citation-clicked');
}
});
return Array.from(element.childNodes });
.map(child => getAllVisibleText(child))
.join(' ')
.replace(/\s+/g, ' ')
.trim();
}
return '';
}
 
function extractVisibleText if (htmlStringindex < refs.length - 1) {
const parser = new DOMParser cell.appendChild(document.createTextNode(', '));
const doc = parser.parseFromString(htmlString, 'text/html'); }
return getAllVisibleText(doc.body });
 
row.appendChild(cell);
tbody.appendChild(row);
});
 
return table;
}
 
function checkDuplicateReferenceLinks() {
console.logdebug("Checking for duplicate reference links");
const duplicateInfo = getDuplicateInfo();
 
if (duplicateInfo.length > 0) {
console.logdebug("Duplicates found, creating listcollapsible table");
 
ifconst (document.querySelector('table.box-Duplicated_citations') === nullcreateCollapsibleTable(duplicateInfo) {;
const editSections = parentDivcontainerDiv.querySelectorAllafter('span.mw-editsection'table);
 
// Set up collapsible editSections.forEach(editSection => {functionality
const let spanBeforetoggleLink = documenttable.createElementquerySelector('span.mw-collapsible-toggle a');
const tableBody = spanBefore$(table).className = find('mw-editsectiontr:not(:first-bracketchild)');
const spanBefore.textContentstorageKey = '[duplicateReferencesTableState';
 
function let addTemplateLink = document.createElementsetTableState('a'isCollapsed); {
if addTemplateLink.textContent = ' add(isCollapsed) {{duplicated citations}} ';
addTemplateLinktableBody.href = '#'hide();
addTemplateLinktoggleLink.addEventListener('click',textContent function(e)= {'show';
} else e.preventDefault();{
addDuplicateCitationsTemplatetableBody.show();
})toggleLink.textContent = 'hide';
}
let spanAfter = documentlocalStorage.createElementsetItem('span'storageKey, isCollapsed);
spanAfter.className = 'mw-editsection-bracket';
spanAfter.textContent = ']';
editSection.appendChild(spanBefore);
editSection.appendChild(addTemplateLink);
editSection.appendChild(spanAfter);
});
}
duplicateInfo.forEach(({ url, refs }) => {
let paragraphInfo = document.createElement('span');
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
paragraphInfo.appendChild(document.createTextNode('Duplicate URL: '));
paragraphInfo.appendChild(urlLink);
paragraphInfo.appendChild(document.createTextNode(' in refs: '));
refs.forEach((refNumber, index) => {
let link = document.createElement('a');
link.href = `#cite_note-${refNumber}`;
link.textContent = refNumber;
paragraphInfo.appendChild(link);
 
// Highlight only the specificInitialize duplicatesstate onfrom hoverlocalStorage
const initialState = linklocalStorage.addEventListener('mouseover', getItem(storageKey) =>== {'true';
refs.forEachsetTableState(ref => {initialState);
const citationElement = document.getElementById(`cite_note-${ref}`);
if (citationElement) {
if (ref === refNumber) {
citationElement.classList.add('duplicate-citation-hover');
} else {
citationElement.classList.add('duplicate-citation-highlight');
}
}
});
});
link.addEventListener('mouseout', () => {
refs.forEach(ref => {
const citationElement = document.getElementById(`cite_note-${ref}`);
if (citationElement) {
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
}
});
});
 
// Highlight duplicates on toggleLink.addEventListener('click', andfunction allow(e) navigation{
linke.addEventListener('click', preventDefault() => {;
const isCurrentlyCollapsed = // Remove previous click highlightstableBody.is(':hidden');
document.querySelectorAllsetTableState('.duplicate-citation-clicked'!isCurrentlyCollapsed).forEach(el => {;
el.classList.remove('duplicate-citation-clicked');
});
// Add new click highlights
refs.forEach(ref => {
const citationElement = document.getElementById(`cite_note-${ref}`);
if (citationElement) {
citationElement.classList.add('duplicate-citation-clicked');
}
});
// The default behavior (navigation) will now occur
});
if (index < refs.length - 1) {
paragraphInfo.appendChild(document.createTextNode(', '));
}
});
paragraphInfo.appendChild(document.createElement('br'));
newParagraph.appendChild(paragraphInfo);
});
console.log("Appending duplicate list to page");
parentDiv.after(newParagraph);
} else {
console.logdebug("No duplicates found");
}
}
 
checkDuplicateReferenceLinks();
console.logdebug("Script execution completed");
});
});
// </nowiki>