/**
*
* Citation Watchlist
* https://en.wikipedia.org/wiki/WP:WATCHCITE
*
*/
/**
* ==========================================================================
* Domain List Configuration
* ==========================================================================
*
* Citation Watchlist requires the following wiki pages to function:
*
* 1. Public Suffix List
* - A local copy of the public suffix list, used for ___domain parsing.
* - Copy the contents of:
* https://en.wikipedia.org/wiki/Wikipedia:Citation_Watchlist/Public_Suffix_List
* to a page on your own wiki.
* - Update the `publicSuffixList` variable below to reflect your page title.
*
* 2. List of Lists
* - A page linking to one or more ___domain list pages.
* - Format as a bullet list: "* [[Page Title]]" (space after asterisk).
* - Reference formatting example:
* https://en.wikipedia.org/wiki/Wikipedia:Citation_Watchlist/Lists
* - Update the `listOfLists` variable below accordingly.
*
* 3. Domain List Pages
* - One or more pages listing suspicious or noteworthy domains.
* - Each page must contain section headers that match the `indicators` config
* below (e.g., "==Warn==", "==Caution==").
* - Under each section, list domains in the format: "* example.com"
* - Do not use link formatting—just plain text.
*/
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
/**
* ==========================================================================
* Indicator Configuration
* ==========================================================================
*
* Defines metadata for ___domain indicators used in the watchlist UI.
* Each indicator is associated with a level of urgency and a unique symbol.
*
* Fields:
* - msg: Display label for the level (e.g., "Warning", "Caution").
* - emoji: Unicode character for the visual indicator (escaped as `\uXXXX`).
* - section: Must exactly match the section headers in the ___domain list pages.
* - priority: Higher values override lower ones for conflicting ___domain matches.
* Priority scale: 1 (lowest) to N (highest).
* - list: Defined as "new Set()" for all indicator types.
*
* If a ___domain appears in multiple lists, the one with the highest priority
* takes precedence.
*/
const indicators = {
warning: {
msg: "Warning",
emoji: '\u2757',
section: "==Warn==",
priority: 3,
list: new Set()
},
caution: {
msg: "Caution",
emoji: '\u270B',
section: "==Caution==",
priority: 2,
list: new Set()
},
inspect: {
msg: "Inspect",
emoji: '\uD83D\uDD0E',
section: "==Inspect==",
priority: 1,
list: new Set()
},
removed: {
msg: "Removed",
emoji: '\u{1F5D1}',
section: null,
priority: -1,
list: new Set()
}
};
/**
* Citation Watchlist
*
* Highlights potentially questionable citations added in Wikipedia revisions,
* using predefined ___domain lists and a public suffix list to analyze diffs.
*
*
* Documentation: https://en.wikipedia.org/wiki/WP:WATCHCITE
*
* Author: James Hare under contract with Hacks/Hackers
* License: GNU General Public License v3.0 (GPL-3.0)
*
* @version 1.14
* @since 2025-06-27
*/
/**
* If you want to modify any part of the script below this point, please submit
* your edits to https://test.wikipedia.org/wiki/User:Harej/citation-watchlist.js
* so that your modifications can be tested.
*/
let publicSuffixSet = new Set();
const namespaces = Object.entries(mw.config.get('wgFormattedNamespaces'))
.filter(([num, name]) => num !== '0' && num !== '118')
.map(([_, name]) => name.replace(/ /g, '_') + ':');
/**
* Main entry point for Citation Watchlist.
* Determines if the current page should be analyzed, fetches ___domain and suffix
* lists, processes each change/revision in the recent changes or history page,
* and triggers analysis to highlight questionable domains.
*/
async function analyzeView() {
purgeExpiredCache();
const ns = mw.config.get('wgNamespaceNumber');
if (![-1, 0, 118].includes(ns)) {
return;
}
publicSuffixSet = await fetchPublicSuffixList();
if (publicSuffixSet.size === 0) {
console.error('Public Suffix List loading failed');
return;
}
console.log("Welcome to Citation Watchlist");
const listPages = await fetchDomainListPages(listOfLists);
if (listPages) {
const lists = await fetchAndOrganizeDomainLists(listPages);
if (lists) {
for (const type in indicators) {
lists[type].list.forEach(indicators[type].list.add, indicators[type].list);
}
}
}
const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
let noLinks = true;
for (const container of entriesContainers) {
const diffLink = container.querySelector('a.mw-changeslist-diff');
const histLink = container.querySelector('a.mw-changeslist-history');
const prevLink = container.querySelector(
'a.mw-history-histlinks-previous');
const curLink = container.querySelector('a.mw-history-histlinks-current');
let revision = null;
let urlParams = '';
if (diffLink) {
noLinks = false;
const diffUrl = new URL(diffLink.href);
urlParams = new URLSearchParams(diffUrl.search);
const pageTitle = urlParams.get('title');
if (isNotArticle(pageTitle)) continue;
revision = {
oldrevision: urlParams.get('diff'),
newrevision: urlParams.get('oldid'),
element: diffLink.parentNode.parentNode
};
if (revision.oldrevision === 'prev') { // This happens on user contributions pages
const previousRevisionMap = await fetchPreviousRevisionIds(
[revision.newrevision]);
revision.oldrevision = revision.newrevision;
revision.newrevision = previousRevisionMap[revision.newrevision];
}
} else if (histLink) {
noLinks = false;
const histUrl = new URL(histLink.href);
urlParams = new URLSearchParams(histUrl.search);
const pageTitle = urlParams.get('title');
if (isNotArticle(pageTitle)) continue;
const firstID = await fetchFirstRevisionId(pageTitle);
if (!firstID) continue;
revision = {
oldrevision: firstID,
element: histLink.parentNode.parentNode
};
} else if (prevLink) {
noLinks = false;
urlParams = new URLSearchParams(prevLink.href);
const previousRevisionMap = await fetchPreviousRevisionIds(
[urlParams.get('oldid')]);
revision = {
oldrevision: urlParams.get('oldid'),
newrevision: previousRevisionMap[urlParams.get('oldid')],
element: prevLink.parentNode.parentNode
};
} else if (curLink) {
noLinks = false;
urlParams = new URLSearchParams(curLink.href);
revision = {
oldrevision: urlParams.get('oldid'),
element: curLink.parentNode.parentNode
};
}
if (revision) {
await analyzeRevision(revision);
}
}
// If no links were found, extract the first revision ID
if (noLinks == true) {
const pageTitle = mw.config.get('wgTitle');
const firstID = await fetchFirstRevisionId(pageTitle);
revision = {
oldrevision: firstID,
element: entriesContainers[0]
};
await analyzeRevision(revision);
}
}
/**
* Analyzes a revision (or a pair of revisions) for newly added URLs,
* compares them against ___domain watchlists, and highlights matches.
*
* @param {Object} revision - Object containing oldrevision, optional newrevision, and DOM element.
*/
async function analyzeRevision(revision) {
const lookup = [revision.oldrevision];
if (revision.newrevision) {
lookup.push(revision.newrevision);
}
const wikiDomain = ___location.hostname;
const cacheKey = `revisionDiff:${wikiDomain}:${revision.oldrevision}:${revision.newrevision || 'null'}`;
const oneMonth = 30 * 24 * 60 * 60 * 1000;
let addedURLs = [];
let removedURLs = [];
// Try reading from cache
const cached = localStorage.getItem(cacheKey);
if (cached) {
try {
const parsed = JSON.parse(cached);
const age = Date.now() - parsed.timestamp;
if (age < oneMonth) {
console.log(`Cache hit for revision ${cacheKey}`);
if (Array.isArray(parsed.addedURLs)) {
addedURLs = parsed.addedURLs;
}
if (Array.isArray(parsed.removedURLs)) {
removedURLs = parsed.removedURLs;
}
}
} catch (e) {
console.warn('Cache parse error, refetching:', e);
}
}
// If not cached, fetch and process
if (addedURLs.length === 0 && removedURLs.length === 0) {
const wikitext = await fetchRevisionContent(lookup);
const fromURLs = new Set(extractURLs(wikitext.oldrevision) || []);
const toURLs = new Set(extractURLs(wikitext.newrevision) || []);
if (revision.newrevision) {
// URLs present in new revision but not in old revision = added
addedURLs = [...toURLs].filter(url => !fromURLs.has(url));
// URLs present in old revision but not in new revision = removed
removedURLs = [...fromURLs].filter(url => !toURLs.has(url));
} else {
// For first revision, all URLs are considered added
addedURLs = Array.from(fromURLs);
removedURLs = [];
}
try {
localStorage.setItem(cacheKey, JSON.stringify({
timestamp: Date.now(),
addedURLs,
removedURLs
}));
} catch (e) {
console.warn('Failed to store cache:', e);
}
}
console.log(`Revision element: ${revision.element.innerHTML}
Added URLs: ${addedURLs.join(' ')}
Removed URLs: ${removedURLs.join(' ')}
`);
// Match domains to indicator types
const matchedDomains = Object.keys(indicators).reduce((acc, key) => {
acc[key] = [];
return acc;
}, {});
// Process removed URLs first - these always get the "removed" indicator
if (removedURLs.length > 0) {
const removedDomains = [];
for (const url of removedURLs) {
try {
const hostname = new URL(url).hostname;
const ___domain = getRootDomain(hostname, publicSuffixSet);
if (!removedDomains.includes(___domain)) {
removedDomains.push(___domain);
}
} catch (e) {
console.warn(`Error processing removed URL ${url}:`, e);
}
}
matchedDomains.removed = removedDomains;
}
// Process added URLs
for (const url of addedURLs) {
try {
const hostname = new URL(url).hostname;
const ___domain = getRootDomain(hostname, publicSuffixSet);
let highestPriorityType = null;
for (const type in indicators) {
if (type !== 'removed' && indicators[type].list.has(___domain)) {
if (
highestPriorityType === null ||
indicators[type].priority > indicators[highestPriorityType].priority
) {
highestPriorityType = type;
}
}
}
if (
highestPriorityType !== null &&
!matchedDomains[highestPriorityType].includes(___domain)
) {
matchedDomains[highestPriorityType].push(___domain);
// Remove this ___domain from lower priority lists
for (const type in indicators) {
if (
type !== 'removed' && // Never remove from "removed" list
indicators[type].priority < indicators[highestPriorityType].priority
) {
matchedDomains[type] = matchedDomains[type].filter(d => d !== ___domain);
}
}
}
} catch (e) {
console.warn(`Error processing added URL ${url}:`, e);
}
}
// Prepend emoji indicators - "removed" indicator should appear even if other indicators are present
for (const type in indicators) {
if (matchedDomains[type] && matchedDomains[type].length > 0) {
prependEmojiWithTooltip(revision.element, type, matchedDomains[type]);
}
}
}
/**
* Prepends an emoji and tooltip to a revision list entry DOM element if any
* domains matched a warning list.
*
* @param {HTMLElement} element - The container element to prepend the emoji to.
* @param {string} type - The type of indicator ('warning', 'caution', 'inspect').
* @param {string[]} domains - The list of matched domains for the indicator.
*/
function prependEmojiWithTooltip(element, type, domains) {
const indicator = indicators[type];
if (!indicator || element.getAttribute(`data-processed-${type}`) === 'true') {
return;
}
const emojiSpan = document.createElement('span');
emojiSpan.textContent = indicator.emoji + " ";
emojiSpan.title = `${indicator.msg}: ${domains.join(", ")}`;
element.parentNode.insertBefore(emojiSpan, element);
element.setAttribute(`data-processed-${type}`, 'true');
}
/**
* Extracts the first page object from MediaWiki API query response.
*
* @param {Object} data - MediaWiki API response.
* @returns {Object|null} The first page object or null if unavailable.
*/
async function getFirstPage(data) {
if (!data || !data.query || !data.query.pages) return null;
const pages = data.query.pages;
return Object.values(pages)[0]; // Return the first page
}
/**
* Retrieves the first revision from a page object.
*
* @param {Object} page - Page object containing revisions.
* @returns {Object|null} First revision object or null.
*/
async function getFirstRevision(page) {
if (page.revisions && page.revisions.length > 0) {
return page.revisions[0];
}
return null;
}
/**
* Fetches wikitext content for one or two revisions by ID.
*
* @param {string[]} revIds - Array of revision IDs.
* @returns {Object} Object with `oldrevision` and optionally `newrevision` as wikitext strings.
*/
async function fetchRevisionContent(revIds) {
const data = await fetchRevisionData({
revids: revIds,
rvprop: ['content'],
rvslots: ['main']
});
const page = await getFirstPage(data);
const wikitext = { oldrevision: null, newrevision: null };
if (page.revisions && page.revisions.length > 0) {
wikitext.oldrevision = page.revisions[0].slots.main['*'] || null;
if (page.revisions.length > 1) {
wikitext.newrevision = page.revisions[1].slots.main['*'] || null;
}
}
return wikitext;
}
/**
* Fetches the parent revision IDs for a given list of revision IDs.
*
* @param {string[]} revisionIds - Array of revision IDs.
* @returns {Object} Map of revision ID to its parent ID.
*/
async function fetchPreviousRevisionIds(revisionIds) {
const data = await fetchRevisionData({
revids: revisionIds,
rvprop: ['ids']
});
const page = await getFirstPage(data);
if (!page) return {};
const revisionMap = {};
for (const revision of page.revisions) {
revisionMap[revision.revid] = revision.parentid;
}
return revisionMap;
}
/**
* Fetches the ID of the first revision of a page.
*
* @param {string} pageTitle - The page title to look up.
* @returns {number|null} Revision ID or null.
*/
async function fetchFirstRevisionId(pageTitle) {
const data = await fetchRevisionData({
titles: [pageTitle],
rvlimit: 1,
rvdir: 'newer',
rvprop: ['ids'],
});
const page = await getFirstPage(data);
if (!page) return null;
const revision = await getFirstRevision(page);
return revision ? revision.revid : null;
}
/**
* Fetches the list of subpages from the list of lists, parses wikilinks, caches
* the result, and returns list of subpage titles.
*
* @param {string} pageName - Title of the list-of-lists page.
* @returns {Promise<string[]>} List of subpage titles.
*/
async function fetchDomainListPages(pageName) {
const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
const cacheExpiration = 4 * 60 * 60 * 1000;
const now = Date.now();
const cachedData = localStorage.getItem(cacheKey);
const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) <
cacheExpiration) {
console.log("Loaded list of lists from cache");
return JSON.parse(cachedData);
}
const data = await fetchRevisionData({
titles: [pageName],
rvprop: ['content'],
rvslots: ['*']
});
const page = await getFirstPage(data);
if (!page) return [];
const content = page.revisions[0].slots.main['*'];
const pageTitles = [];
const lines = content.split('\n');
for (let line of lines) {
if (line.startsWith('* [[')) {
const match = line.match(
/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
if (match) {
pageTitles.push(match[1]);
}
}
}
localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
console.log("Loaded from API and stored in cache");
return pageTitles;
}
/**
* Loads ___domain lists from a set of pages, categorizes them by indicator section
* headers, and populates the corresponding `Set` in the global `indicators` object.
*
* @param {string[]} pageNames - List of page titles to fetch.
* @returns {Object} Updated indicators object with ___domain sets.
*/
async function fetchAndOrganizeDomainLists(pageNames) {
const cacheTTL = 6 * 60 * 60 * 1000;
const now = Date.now();
const cachedData = {};
const pagesToFetch = [];
for (const title of pageNames) {
const cacheKey = `domainList:${___location.hostname}:${title}`;
const cached = localStorage.getItem(cacheKey);
if (cached) {
try {
const parsed = JSON.parse(cached);
if (now - parsed.timestamp < cacheTTL && parsed.content) {
console.log(`Using cached content for page: ${title}`);
cachedData[title] = parsed.content;
continue;
} else {
console.log(`Cache expired for page: ${title}`);
}
} catch (e) {
console.warn(`Cache error for ${title}:`, e);
}
}
console.log(`Will fetch page: ${title}`);
pagesToFetch.push(title);
}
let fetchedPages = {};
if (pagesToFetch.length > 0) {
const apiData = await fetchRevisionData({
titles: pagesToFetch,
rvprop: ['content'],
rvslots: ['*'],
});
const pages = apiData.query.pages;
for (const pageId in pages) {
const page = pages[pageId];
const title = page.title;
const content = page.revisions[0].slots.main['*'];
fetchedPages[title] = content;
const cacheKey = `domainList:${___location.hostname}:${title}`;
try {
localStorage.setItem(cacheKey, JSON.stringify({
timestamp: now,
content,
}));
console.log(`Cached content for page: ${title}`);
} catch (e) {
console.warn(`Failed to cache ${title}:`, e);
}
}
}
const allContent = { ...cachedData, ...fetchedPages };
for (const title in allContent) {
const content = allContent[title];
let currentList = null;
const lines = content.split('\n');
for (let line of lines) {
for (const type in indicators) {
if (line.trim() === indicators[type].section) {
currentList = indicators[type].list;
break;
}
}
if (line.startsWith('*') && currentList) {
const ___domain = line.substring(1).trim();
// Discard ___domain entries with slashes (indicating domains with paths)
if (!___domain.includes('/')) {
currentList.add(___domain);
}
}
}
}
return indicators;
}
/**
* Fetches and caches the public suffix list used to identify top-level domains.
*
* @returns {Promise<Set<string>>} Set of public suffixes.
*/
async function fetchPublicSuffixList() {
const cacheKey = 'publicSuffixListCache';
const cacheTTL = 24 * 60 * 60 * 1000;
const cached = localStorage.getItem(cacheKey);
if (cached) {
try {
const parsed = JSON.parse(cached);
const age = Date.now() - parsed.timestamp;
if (age < cacheTTL && parsed.content) {
console.log('Using cached public suffix list');
return new Set(parsed.content.split('\n').filter(line =>
line.trim() && !line.trim().startsWith('//')
).map(line => line.trim()));
}
} catch (e) {
console.warn('Error parsing cache, refetching:', e);
}
}
const pslUrl = mw.config.get('wgArticlePath').replace('$1', publicSuffixList)
+ '?action=raw';
console.log(`Raw page text request: ${pslUrl}`);
const content = await safeFetch(fetch, pslUrl).then(response => response ?
response.text() : null);
if (!content) return new Set();
try {
localStorage.setItem(cacheKey, JSON.stringify({
timestamp: Date.now(),
content
}));
} catch (e) {
console.warn('Failed to write to cache:', e);
}
const suffixSet = new Set();
const lines = content.split('\n');
for (const line of lines) {
if (line.trim() && !line.trim().startsWith('//')) {
suffixSet.add(line.trim());
}
}
return suffixSet;
}
/**
* Makes a MediaWiki API call to fetch revision metadata or content.
*
* @param {Object} data - Options for the API call, such as `revids`, `titles`, `rvprop`, etc.
* @returns {Promise<Object>} MediaWiki API query result.
*/
async function fetchRevisionData(data) {
const paramKeys = ['rvprop', 'revids', 'titles', 'rvslots'];
const params = {
action: 'query',
prop: 'revisions',
format: 'json',
rvdir: data.rvdir || 'older',
origin: '*'
};
if (data.rvlimit) { params.rvlimit = data.rvlimit; }
paramKeys.forEach(key => {
if (data[key]) {
params[key] = Array.isArray(data[key]) ? data[key].join('|') : data[key];
}
});
const api = new mw.Api();
return await safeFetch(api.get.bind(api), params);
}
/**
* Wraps any asynchronous fetch function with retry logic and error handling.
*
* @param {Function} fn - The function to execute (usually an API call).
* @param {...any} args - Arguments to pass to the fetch function.
* @param {Object} options - Optional configuration for the fetch operation.
* @param {number} options.retries - Number of retry attempts (default: 2).
* @param {number} options.retryDelay - Delay between retries in ms (default: 1000).
* @returns {Promise<any|null>} Result of the fetch or null on failure.
*/
async function safeFetch(fn, ...args) {
// Extract options if the last argument is an options object
let options = { retries: 2, retryDelay: 1000 };
if (args.length > 0 && typeof args[args.length - 1] === 'object' && args[args.length - 1]._isSafeFetchOptions) {
options = { ...options, ...args.pop() };
}
let lastError = null;
let attempt = 0;
const maxAttempts = options.retries + 1;
while (attempt < maxAttempts) {
try {
attempt++;
const result = await fn(...args);
// Check if the result is valid (not null or undefined)
if (result === null || result === undefined) {
throw new Error('Received null or undefined response');
}
// For fetch API responses, check if the status is ok
if (result && typeof result.ok === 'boolean' && !result.ok) {
throw new Error(`HTTP error ${result.status}: ${result.statusText || 'Unknown error'}`);
}
return result;
} catch (error) {
lastError = error;
// Log the error with attempt information
if (attempt < maxAttempts) {
console.warn(`Error during ${fn.name || 'fetch operation'} (attempt ${attempt}/${maxAttempts}):`,
error.message || error);
// Wait before retrying
await new Promise(resolve => setTimeout(resolve, options.retryDelay));
} else {
// Final attempt failed
console.error(`All ${maxAttempts} attempts failed for ${fn.name || 'fetch operation'}:`,
error.message || error);
}
}
}
// All attempts failed
return null;
}
// Helper function to create options for safeFetch
safeFetch.withOptions = function(retries, retryDelay) {
return {
retries: retries || 2,
retryDelay: retryDelay || 1000,
_isSafeFetchOptions: true
};
};
/**
* Extracts all HTTP(S) URLs from a given wikitext string.
*
* @param {string} wikitext - Raw wikitext revision content.
* @returns {string[]} List of valid extracted URLs.
*/
function extractURLs(wikitext) {
const urls = [];
if (!wikitext) return urls;
const urlRegex = /https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*)/g;
let match;
while ((match = urlRegex.exec(wikitext)) !== null) {
try {
// Use URL constructor for validation and normalization
const url = new URL(match[0]);
// Only add if it's not already in the list (avoid duplicates)
if (!urls.includes(url.href)) {
urls.push(url.href);
}
} catch (error) {
console.error(`Invalid URL rejected: ${match[0]}`, error.message);
}
}
return urls;
}
/**
* Extracts the top-level ___domain from a full hostname using a public suffix set.
*
* @param {string} hostname - Full hostname (e.g., sub.example.co.uk).
* @param {Set<string>} publicSuffixSet - Set of known public suffixes.
* @returns {string} The top-level ___domain (e.g., example.co.uk).
*/
function getRootDomain(hostname, publicSuffixSet) {
// Handle empty or invalid hostnames
if (!hostname || typeof hostname !== 'string') {
console.warn('Invalid hostname provided to getRootDomain:', hostname);
return '';
}
// Check if this is an IP address (simple check for IPv4)
if (/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname)) {
return hostname; // Return IP addresses as-is
}
const domainParts = hostname.split('.');
// Handle hostnames that are too short
if (domainParts.length < 2) {
return hostname; // Return as-is if it's a single-part hostname
}
// Try to find a matching public suffix
for (let i = 0; i < domainParts.length; i++) {
const candidate = domainParts.slice(i).join('.');
// Check both normal and exception (prefixed with !) entries
if (publicSuffixSet.has(candidate) || publicSuffixSet.has(`!${candidate}`)) {
// If we found a match, return the ___domain part plus the public suffix
// But make sure we don't go out of bounds
if (i > 0) {
return domainParts.slice(i - 1).join('.');
} else {
// Edge case: the entire hostname is a public suffix
return hostname;
}
}
}
// If no match in public suffix list, use a simple fallback:
// For hostnames with 2 parts, return the whole thing
// For hostnames with >2 parts, return the last 2 parts
if (domainParts.length === 2) {
return hostname;
} else {
return domainParts.slice(-2).join('.');
}
}
/**
* Determines whether a given page title does *not* belong to the main or draft namespaces.
*
* @param {string} pageTitle - The title of the page.
* @returns {boolean} True if not an article namespace.
*/
function isNotArticle(pageTitle) {
return namespaces.some(namespace => pageTitle.startsWith(namespace));
}
/**
* Cleans up expired localStorage cache entries based on known cache key prefixes and TTLs.
*/
function purgeExpiredCache() {
const now = Date.now();
// Define cache configurations with their TTLs in milliseconds
const knownCaches = [
{ prefix: 'revisionDiff:', ttl: 30 * 24 * 60 * 60 * 1000, description: 'Revision diff cache' },
{ prefix: 'domainList:', ttl: 6 * 60 * 60 * 1000, description: 'Domain list cache' },
{ prefix: 'publicSuffixListCache', ttl: 24 * 60 * 60 * 1000, description: 'Public suffix list cache' },
{ prefix: 'citationWatchlistFetchDomainListPages_', ttl: 4 * 60 * 60 * 1000, description: 'Domain list pages cache' }
];
// Track statistics for logging
const stats = { checked: 0, expired: 0, errors: 0 };
try {
// Iterate through all localStorage items
for (let i = 0; i < localStorage.length; i++) {
const key = localStorage.key(i);
if (!key) continue; // Skip if key is null (shouldn't happen but being defensive)
// Check if this key belongs to one of our known caches
for (const cache of knownCaches) {
if (key.startsWith(cache.prefix)) {
stats.checked++;
try {
if (key.endsWith('_timestamp')) {
// Handle paired key-timestamp entries
const baseKey = key.replace(/_timestamp$/, '');
const timestampStr = localStorage.getItem(key);
if (!timestampStr) {
// Orphaned timestamp key without a value
localStorage.removeItem(key);
console.log(`Removed orphaned timestamp key: ${key}`);
stats.expired++;
continue;
}
const timestamp = parseInt(timestampStr, 10);
if (isNaN(timestamp) || now - timestamp > cache.ttl) {
// Expired or invalid timestamp
localStorage.removeItem(key);
// Also remove the base key if it exists
if (localStorage.getItem(baseKey) !== null) {
localStorage.removeItem(baseKey);
console.log(`Purged expired ${cache.description}: ${baseKey}`);
} else {
console.log(`Removed orphaned timestamp for missing key: ${baseKey}`);
}
stats.expired++;
}
} else {
// Handle JSON entries with embedded timestamps
const value = localStorage.getItem(key);
if (!value) {
// Empty value, just remove it
localStorage.removeItem(key);
console.log(`Removed empty cache entry: ${key}`);
stats.expired++;
continue;
}
try {
const parsed = JSON.parse(value);
if (parsed && parsed.timestamp && now - parsed.timestamp > cache.ttl) {
// Expired based on embedded timestamp
localStorage.removeItem(key);
console.log(`Purged expired ${cache.description}: ${key}`);
stats.expired++;
}
} catch (jsonError) {
// Invalid JSON, remove the entry
localStorage.removeItem(key);
console.warn(`Removed invalid JSON cache entry: ${key}`, jsonError.message);
stats.errors++;
stats.expired++;
}
}
} catch (itemError) {
console.warn(`Error processing cache item ${key}:`, itemError.message);
stats.errors++;
// Try to remove problematic entries
try {
localStorage.removeItem(key);
console.log(`Removed problematic cache entry: ${key}`);
stats.expired++;
} catch (removeError) {
console.error(`Failed to remove problematic entry ${key}:`, removeError.message);
}
}
break; // Once we've matched a cache type, no need to check others
}
}
}
// Log summary statistics
if (stats.checked > 0) {
console.log(`Cache cleanup complete: checked ${stats.checked} items, removed ${stats.expired} expired items, encountered ${stats.errors} errors`);
}
} catch (globalError) {
console.error('Fatal error during cache cleanup:', globalError.message);
}
}
analyzeView().then(() => console.log(
'Citation Watchlist script finished executing'));