Content deleted Content added
Polygnotus (talk | contribs) No edit summary |
Polygnotus (talk | contribs) v2 |
||
Line 1:
// Wikipedia Category Items Copier - Improved Version
// This script adds three buttons to Wikipedia category pages:
// 1. "Copy Items" - Copies all items in the current category via API
Line 6:
const API_DELAY = 500; // Delay between API requests in milliseconds
const MAX_RETRIES = 3; // Maximum number of retries for failed requests
// Only run on Wikipedia category pages
Line 20 ⟶ 21:
container.style.border = '1px solid #a2a9b1';
container.style.borderRadius = '3px';
// Helper function to create tooltip
function addTooltip(element, text) {
element.title = text;
element.style.position = 'relative';
}
// Create the "Copy Items" button
Line 27 ⟶ 34:
copyItemsBtn.style.padding = '8px 12px';
copyItemsBtn.style.cursor = 'pointer';
addTooltip(copyItemsBtn, 'Copy all articles and pages from this category only (not subcategories). Fast operation.');
// Create the "Copy All Items" button
Line 34 ⟶ 42:
copyAllItemsBtn.style.padding = '8px 12px';
copyAllItemsBtn.style.cursor = 'pointer';
addTooltip(copyAllItemsBtn, 'Copy all articles and pages from this category AND all its subcategories. May take several minutes for large category trees.');
// Create the "Copy Subcategories" button
Line 40 ⟶ 49:
copySubcatsBtn.style.padding = '8px 12px';
copySubcatsBtn.style.cursor = 'pointer';
addTooltip(copySubcatsBtn, 'Copy only the category names (not articles) from this category tree. Useful for exploring category structure.');
// Add checkbox for URL export
const urlCheckbox = document.createElement('input');
urlCheckbox.type = 'checkbox';
urlCheckbox.id = 'includeUrls';
urlCheckbox.style.marginLeft = '15px';
const urlLabel = document.createElement('label');
urlLabel.htmlFor = 'includeUrls';
urlLabel.textContent = 'Include URLs';
urlLabel.style.marginLeft = '5px';
addTooltip(urlLabel, 'Include full Wikipedia URLs for each item in the export');
// Create status text
Line 50 ⟶ 72:
container.appendChild(copyAllItemsBtn);
container.appendChild(copySubcatsBtn);
container.appendChild(urlCheckbox);
container.appendChild(urlLabel);
container.appendChild(statusText);
Line 58 ⟶ 82:
} else {
document.querySelector('#content').prepend(container);
}
// Global visited set to prevent visiting any page more than once across all operations
const globalVisited = new Set();
// Function to format items with URLs if requested
function formatItems(items, includeUrls) {
if (!includeUrls) {
return items.join('\n');
}
return items.map(item => {
const encodedTitle = encodeURIComponent(item.replace(/ /g, '_'));
return `${item}\nhttps://en.wikipedia.org/wiki/${encodedTitle}`;
}).join('\n\n');
}
Line 164 ⟶ 203:
}
// Enhanced API request function with retry logic, rate limiting, and maxlag handling
async function
try {
const response = await fetch(url);
//
if (
const waitTime = Math.pow(2, retryCount) * 1000; // Exponential backoff
statusText.innerHTML += `<br>Rate limited or server error, waiting ${waitTime/1000}s before retry ${retryCount + 1}/${MAX_RETRIES}...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount + 1);
} else {
throw new Error(`Request failed after ${MAX_RETRIES} retries: ${response.status}`);
}
}
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const data = await response.json();
// Handle maxlag errors - these don't count as retries since they're not real failures
if (data.error &&
const lagTime = data.error.lag || 5; // Default to 5 seconds if lag not specified
const waitTime = (lagTime + 2) * 1000; // Add 2 second buffer
statusText.innerHTML += `<br>Database lagged (${lagTime}s), waiting ${waitTime/1000}s before retry...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount); // Don't increment retry count for maxlag
}
//
throw new Error(`API Error: ${data.error.code} - ${data.error.info}`);
}
return data;
} catch (error) {
statusText.
await new Promise(resolve => setTimeout(resolve, 1000));
return makeApiRequest(url, retryCount + 1);
} else {
throw error;
}
}
}
// Function to get all subcategories of a category
async function getSubcategories(categoryTitle, continueToken = null) {
try {
// Base API URL for subcategories (only get items with namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=14&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 212 ⟶ 269:
statusText.textContent = `Fetching subcategories for: ${categoryTitle}...`;
if (!data.query || !data.query.categorymembers) {
Line 222 ⟶ 276:
}
// Extract subcategories and continue token, prefix with "Category:"
const subcategories = data.query.categorymembers.map(member => member.title
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
Line 229 ⟶ 283:
} catch (error) {
console.error("API request error:", error);
statusText.
return { subcategories: [], continueToken: null };
}
Line 238 ⟶ 292:
try {
// Base API URL for non-category members (exclude namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=0|1|2|3|4|5|6|7|8|9|10|11|12|13|15&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 247 ⟶ 302:
statusText.textContent = `Fetching items for: ${categoryTitle}...`;
if (!data.query || !data.query.categorymembers) {
Line 264 ⟶ 316:
} catch (error) {
console.error("API request error:", error);
statusText.
return { members: [], continueToken: null };
}
Line 283 ⟶ 335:
statusText.innerHTML = `Retrieved ${allMembers.length} items from "${categoryTitle}" (page ${pagesProcessed})...`;
} while (continueToken);
Line 304 ⟶ 352:
pagesProcessed++;
} while (continueToken);
Line 313 ⟶ 357:
}
// Function to recursively get all subcategories with circular reference detection
async function getAllSubcategoriesRecursive(categoryTitle
const allSubcategories = [];
const queue = [`Category:${categoryTitle}`]; // Start with prefixed category
const
// Skip if already visited (circular reference detection)
if (visited.has(currentCategory) || globalVisited.has(currentCategory)) {
continue;
visited.add(currentCategory);
globalVisited.add(currentCategory);
statusText.innerHTML = `Exploring subcategories (found ${allSubcategories.length} categories, queue: ${queue.length})...`;
// Get direct subcategories (remove "Category:" prefix for API call)
const categoryNameForApi = currentCategory.replace('Category:', '');
const directSubcategories = await getAllSubcategories(categoryNameForApi);
// Add new subcategories to results and queue
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
allSubcategories.push(subcategory);
queue.push(subcategory);
}
}
}
Line 340 ⟶ 393:
// Function to recursively get all items from a category and all its subcategories
async function getAllItemsRecursive(categoryTitle
const visited = new Set();
const allItems = [];
const queue = [categoryTitle]; // Start without prefix for consistency
while (queue.length > 0) {
const currentCategory = queue.shift();
const categoryKey = `Category:${currentCategory}`;
// Skip if already visited (circular reference detection)
if (visited.has(categoryKey) || globalVisited.has(categoryKey)) {
continue;
}
visited.add(categoryKey);
totalCategories++;
statusText.innerHTML = `Getting items from "${currentCategory}" (processed ${totalCategories} categories, found ${allItems.length} items, queue: ${queue.length})...`;
// Get items from current category
// Get direct subcategories and add to queue
const directSubcategories = await getAllSubcategories(currentCategory);
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
// Remove "Category:" prefix for queue consistency
const subcategoryName = subcategory.replace('Category:', '');
queue.push(subcategoryName);
}
}
}
return { items: allItems, totalCategories };
}
Line 382 ⟶ 444:
}
const
const formattedText = formatItems(items, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName);
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${items.length} items to clipboard.`;
Line 397 ⟶ 462:
try {
globalVisited.clear();
// Get all items recursively
const { items: allItems, totalCategories } = await getAllItemsRecursive(categoryName
// Deduplicate items
Line 411 ⟶ 476:
}
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(uniqueItems, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_all_recursive');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueItems.length} unique items to clipboard from ${
}
} catch (error) {
Line 426 ⟶ 494:
try {
globalVisited.clear();
const allSubcategories = await getAllSubcategoriesRecursive(categoryName);
// Deduplicate subcategories
Line 437 ⟶ 507:
}
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(uniqueSubcategories, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_subcategories');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueSubcategories.length} unique subcategories to clipboard.`;
|