User:Polygnotus/Scripts/CategoryToClipboard.js: Difference between revisions

Content deleted Content added
No edit summary
add "both" and "both recursive" buttons, re-order
 
(16 intermediate revisions by the same user not shown)
Line 1:
// Wikipedia Category Items Copier - Fixed Redirect Filtering
 
// This script adds three buttons to Wikipedia category pages:
const API_DELAY = 500; // Delay between API requests in milliseconds
// 1. "Copy Items" - Copies all items in the current category via API
const MAX_RETRIES = 3; // Maximum number of retries for failed requests
// 2. "Copy All Items" - Copies all items in the current category and its subcategories via API
// 3. "Copy Subcategories" - Copies all subcategories recursively (no items)
 
// Only run on Wikipedia category pages
Line 18 ⟶ 17:
container.style.border = '1px solid #a2a9b1';
container.style.borderRadius = '3px';
 
// Helper function to create tooltip
function addTooltip(element, text) {
element.title = text;
element.style.position = 'relative';
}
 
// Create the "Copy Items" button
const copyItemsBtn = document.createElement('button');
copyItemsBtn.textContent = 'Copy Items from this Categoryitems';
copyItemsBtn.style.marginRight = '10px';
copyItemsBtn.style.padding = '8px 12px';
copyItemsBtn.style.cursor = 'pointer';
addTooltip(copyItemsBtn, 'Copy all items in this category. Not recursive.');
// Create the "Copy All Items" button
const copyAllItemsBtn = document.createElement('button');
copyAllItemsBtn.textContent = 'Copy Itemsitems from All Subcategoriesrecursively';
copyAllItemsBtn.style.marginRight = '10px';
copyAllItemsBtn.style.padding = '8px 12px';
copyAllItemsBtn.style.cursor = 'pointer';
addTooltip(copyAllItemsBtn, 'Copy all items in this category AND all items in its subcategories.');
// Create the "Copy Subcats from this Category" button
const copyDirectSubcatsBtn = document.createElement('button');
copyDirectSubcatsBtn.textContent = 'Copy subcats';
copyDirectSubcatsBtn.style.marginRight = '10px';
copyDirectSubcatsBtn.style.padding = '8px 12px';
copyDirectSubcatsBtn.style.cursor = 'pointer';
addTooltip(copyDirectSubcatsBtn, 'Copy all subcategories of this category. Not recursive.');
// Create the "Copy Subcategories" button
const copySubcatsBtn = document.createElement('button');
copySubcatsBtn.textContent = 'Copy Allsubcategories Subcategoriesrecursively';
copySubcatsBtn.style.marginRight = '10px';
copySubcatsBtn.style.padding = '8px 12px';
copySubcatsBtn.style.cursor = 'pointer';
addTooltip(copySubcatsBtn, 'Copy all subcategories of this category and its subcategories.');
// Create the "Copy Both" button
const copyBothBtn = document.createElement('button');
copyBothBtn.textContent = 'Copy both';
copyBothBtn.style.marginRight = '10px';
copyBothBtn.style.padding = '8px 12px';
copyBothBtn.style.cursor = 'pointer';
addTooltip(copyBothBtn, 'Copy all items and subcategories from this category. Not recursive.');
// Create the "Copy Both Recursively" button
const copyBothRecursiveBtn = document.createElement('button');
copyBothRecursiveBtn.textContent = 'Copy both recursively';
copyBothRecursiveBtn.style.marginRight = '10px';
copyBothRecursiveBtn.style.padding = '8px 12px';
copyBothRecursiveBtn.style.cursor = 'pointer';
addTooltip(copyBothRecursiveBtn, 'Copy all items and subcategories from this category and all its subcategories.');
// Add checkbox for URL export
const urlCheckbox = document.createElement('input');
urlCheckbox.type = 'checkbox';
urlCheckbox.id = 'includeUrls';
urlCheckbox.style.marginLeft = '15px';
const urlLabel = document.createElement('label');
urlLabel.htmlFor = 'includeUrls';
urlLabel.textContent = 'Whole URLs';
urlLabel.style.marginLeft = '5px';
addTooltip(urlLabel, 'Include full Wikipedia URLs for each item in the export');
 
// Create status text
Line 44 ⟶ 91:
statusText.style.color = '#555';
// Add buttons to container in the requested order
container.appendChild(copyItemsBtn);
container.appendChild(copyAllItemsBtn);
container.appendChild(copyDirectSubcatsBtn);
container.appendChild(copySubcatsBtn);
container.appendChild(copyBothBtn);
container.appendChild(copyBothRecursiveBtn);
container.appendChild(urlCheckbox);
container.appendChild(urlLabel);
container.appendChild(statusText);
Line 56 ⟶ 108:
} else {
document.querySelector('#content').prepend(container);
}
 
// Global visited set to prevent visiting any page more than once across all operations
const globalVisited = new Set();
 
// Function to format items with URLs if requested
function formatItems(items, includeUrls) {
if (!includeUrls) {
return items.join('\n');
}
// When URLs are requested, return ONLY the URLs, not the article names
return items.map(item => {
const encodedTitle = encodeURIComponent(item.replace(/ /g, '_'));
return `https://en.wikipedia.org/wiki/${encodedTitle}`;
}).join('\n');
}
 
Line 88 ⟶ 156:
// If clipboard fails, offer download instead
const filename = `${categoryName.replace(/[^a-z0-9]/gi, '_')}-items.txt`;
// Clear the status text completely and show only the clipboard failure message
statusText.innerHTML = `<p>Clipboard access failed. Click the link below to download items:</p>`;
offerTextAsDownload(text, filename);
statusText.innerHTML = `<p>Clipboard access failed. Click the link below to download items:</p>` + statusText.innerHTML;
resolve(false);
}
Line 162 ⟶ 231:
}
 
// Enhanced API request function with retry logic, rate limiting, and maxlag handling
// Function to get all members of a category using Wikipedia API
async function getCategoryMembersmakeApiRequest(categoryTitleurl, continueTokenretryCount = null0) {
try {
//await Basenew APIPromise(resolve URL=> setTimeout(resolve, API_DELAY));
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmlimit=500&format=json&origin=*`;
//const Addresponse continue= tokenawait if providedfetch(url);
if (continueToken) {
// Handle rate limiting apiUrl(HTTP +=429) `&cmcontinue=${continueToken}`;or server errors (5xx)
if (response.status === 429 || response.status >= 500) {
if (retryCount < MAX_RETRIES) {
const waitTime = Math.pow(2, retryCount) * 1000; // Exponential backoff
statusText.innerHTML += `<br>Rate limited or server error, waiting ${waitTime/1000}s before retry ${retryCount + 1}/${MAX_RETRIES}...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount + 1);
} else {
throw new Error(`Request failed after ${MAX_RETRIES} retries: ${response.status}`);
}
}
if (!response.ok) {
statusText.textContent = `Fetching category members for: ${categoryTitle}...`;
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
// Add a small delay to avoid hammering the server
await new Promise(resolve => setTimeout(resolve, 800));
const response = await fetch(apiUrl);
const data = await response.json();
// Handle maxlag errors - these don't count as retries since they're not real failures
if (!data.query || !data.query.categorymembers) {
if (data.error && consoledata.error("Unexpected.code API=== response:", data'maxlag'); {
const lagTime = data.error.lag || 5; // Default to 5 seconds if lag not specified
return { members: [], continueToken: null };
const waitTime = (lagTime + 2) * 1000; // Add 2 second buffer
statusText.innerHTML += `<br>Database lagged (${lagTime}s), waiting ${waitTime/1000}s before retry...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount); // Don't increment retry count for maxlag
}
// ExtractHandle membersother andAPI continue tokenerrors
const members =if (data.query.categorymembers.map(membererror) => member.title);{
throw new Error(`API Error: ${data.error.code} - ${data.error.info}`);
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
}
return data;
return { members, continueToken: nextContinueToken };
} catch (error) {
console.errorif ("APIretryCount request< error:", errorMAX_RETRIES); {
statusText.textContentinnerHTML += `Error<br>Request fetchingfailed, categoryretrying members:${retryCount + 1}/${error.messageMAX_RETRIES}...`;
await new Promise(resolve => setTimeout(resolve, 1000));
return { members: [], continueToken: null };
return makeApiRequest(url, retryCount + 1);
} else {
throw error;
}
}
}
 
 
 
// Function to get all subcategories of a category
async function getSubcategories(categoryTitle, continueToken = null) {
try {
// Base API URL for subcategories (only get items with namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=14&cmlimit=500&format=json&origin=*`;
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=14&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 211 ⟶ 299:
statusText.textContent = `Fetching subcategories for: ${categoryTitle}...`;
//const Adddata a= smallawait delay to avoid hammering the servermakeApiRequest(apiUrl);
await new Promise(resolve => setTimeout(resolve, 300));
const response = await fetch(apiUrl);
const data = await response.json();
if (!data.query || !data.query.categorymembers) {
Line 222 ⟶ 306:
}
// Extract subcategories and continue token, prefix with "Category:"
const subcategories = data.query.categorymembers.map(member => member.title.replace('); // Keep full "Category:'," ''));prefix
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
Line 229 ⟶ 313:
} catch (error) {
console.error("API request error:", error);
statusText.textContentinnerHTML += `<br>Error fetching subcategories: ${error.message}`;
return { subcategories: [], continueToken: null };
}
Line 238 ⟶ 322:
try {
// Base API URL for non-category members (exclude namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=0|1|2|3|4|5|6|7|8|9|10|11|12|13|15&cmlimit=500&format=json&origin=*`;
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=0|1|2|3|4|5|6|7|8|9|10|11|12|13|15&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 247 ⟶ 332:
statusText.textContent = `Fetching items for: ${categoryTitle}...`;
//const Adddata a= smallawait delay to avoid hammering the servermakeApiRequest(apiUrl);
await new Promise(resolve => setTimeout(resolve, 300));
const response = await fetch(apiUrl);
const data = await response.json();
if (!data.query || !data.query.categorymembers) {
Line 258 ⟶ 339:
}
// Extract members and continue token
const members = data.query.categorymembers.map(member => member.title);
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
Line 265 ⟶ 346:
} catch (error) {
console.error("API request error:", error);
statusText.textContentinnerHTML += `<br>Error fetching items: ${error.message}`;
return { members: [], continueToken: null };
}
Line 284 ⟶ 365:
statusText.innerHTML = `Retrieved ${allMembers.length} items from "${categoryTitle}" (page ${pagesProcessed})...`;
// Add a longer pause between requests to be gentler on the API
if (continueToken) {
statusText.innerHTML += ` Pausing before next request...`;
await new Promise(resolve => setTimeout(resolve, 1000));
}
} while (continueToken);
Line 306 ⟶ 382:
pagesProcessed++;
// Add a longer pause between requests
if (continueToken) {
statusText.innerHTML += ` Pausing before next subcategory request...`;
await new Promise(resolve => setTimeout(resolve, 1000));
}
} while (continueToken);
Line 316 ⟶ 387:
}
// Function to recursively get all subcategories with circular reference detection
async function getAllSubcategoriesRecursive(categoryTitle, processedCategories = new Set(), depth = 0) {
//const Preventvisited infinite= loopsnew and excessive depthSet();
const allSubcategories = [];
if (processedCategories.has(categoryTitle) || depth > 10) {
const queue = [`Category:${categoryTitle}`]; // Start with prefixed category
return [];
while (queue.length > 0) {
const currentCategory = queue.shift();
// Skip if already visited (circular reference detection)
if (visited.has(currentCategory) || globalVisited.has(currentCategory)) {
continue;
}
visited.add(currentCategory);
globalVisited.add(currentCategory);
statusText.innerHTML = `Exploring subcategories (found ${allSubcategories.length} categories, queue: ${queue.length})...`;
// Get direct subcategories (remove "Category:" prefix for API call)
const categoryNameForApi = currentCategory.replace('Category:', '');
const directSubcategories = await getAllSubcategories(categoryNameForApi);
// Add new subcategories to results and queue
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
allSubcategories.push(subcategory);
queue.push(subcategory);
}
}
}
return allSubcategories;
processedCategories.add(categoryTitle);
}
// Function to recursively get all items from a category and all its subcategories
async function getAllItemsRecursive(categoryTitle) {
const visited = new Set();
const allItems = [];
const queue = [categoryTitle]; // Start without prefix for consistency
let totalCategories = 0;
while (queue.length > 0) {
statusText.innerHTML = `Exploring subcategories of "${categoryTitle}" (depth ${depth}, found ${processedCategories.size} categories so far)...`;
const currentCategory = queue.shift();
const categoryKey = `Category:${currentCategory}`;
// Skip if already visited (circular reference detection)
if (visited.has(categoryKey) || globalVisited.has(categoryKey)) {
continue;
}
visited.add(categoryKey);
globalVisited.add(categoryKey);
totalCategories++;
statusText.innerHTML = `Getting items from "${currentCategory}" (processed ${totalCategories} categories, found ${allItems.length} items, queue: ${queue.length})...`;
// Get items from current category
const currentItems = await getAllCategoryMembers(currentCategory);
allItems.push(...currentItems);
// Get direct subcategories and add to queue
const directSubcategories = await getAllSubcategories(currentCategory);
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
// Remove "Category:" prefix for queue consistency
const subcategoryName = subcategory.replace('Category:', '');
queue.push(subcategoryName);
}
}
}
//return Get{ directitems: subcategoriesallItems, totalCategories };
}
const directSubcategories = await getAllSubcategories(categoryTitle);
let allSubcategories = [...directSubcategories];
// Function to get both items and subcategories from a category (non-recursive)
async function getBothItemsAndSubcategories(categoryTitle) {
statusText.innerHTML = 'Gathering items and subcategories from this category...';
const items = await getAllCategoryMembers(categoryTitle);
// Recursively get subcategories of each subcategory
for (const subcategorysubcategories of= directSubcategoriesawait getAllSubcategories(categoryTitle) {;
if (!processedCategories.has(subcategory)) {
return { items, subcategories };
const nestedSubcategories = await getAllSubcategoriesRecursive(subcategory, processedCategories, depth + 1);
}
allSubcategories = allSubcategories.concat(nestedSubcategories);
// Function to recursively get both items and subcategories from a category and all its subcategories
async function getBothItemsAndSubcategoriesRecursive(categoryTitle) {
const visited = new Set();
const allItems = [];
const allSubcategories = [];
const queue = [categoryTitle]; // Start without prefix for consistency
let totalCategories = 0;
while (queue.length > 0) {
const currentCategory = queue.shift();
const categoryKey = `Category:${currentCategory}`;
// Skip if already visited (circular reference detection)
if (visited.has(categoryKey) || globalVisited.has(categoryKey)) {
continue;
}
visited.add(categoryKey);
globalVisited.add(categoryKey);
totalCategories++;
statusText.innerHTML = `Getting items and subcategories from "${currentCategory}" (processed ${totalCategories} categories, found ${allItems.length} items, ${allSubcategories.length} subcategories, queue: ${queue.length})...`;
// Get items from current category
const currentItems = await getAllCategoryMembers(currentCategory);
allItems.push(...currentItems);
// Get direct subcategories
const directSubcategories = await getAllSubcategories(currentCategory);
// Add subcategories to results and queue
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
allSubcategories.push(subcategory);
// Remove "Category:" prefix for queue consistency
const subcategoryName = subcategory.replace('Category:', '');
queue.push(subcategoryName);
}
}
}
return { items: allItems, subcategories: allSubcategories, totalCategories };
}
Line 354 ⟶ 528:
}
const copySuccessincludeUrls = await copyToClipboardOrDownload(itemsurlCheckbox.join('\n'), categoryName)checked;
const formattedText = formatItems(items, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName);
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${items.length} items to clipboard.`;
Line 366 ⟶ 543:
// Handle "Copy All Items" button click
copyAllItemsBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering items from this category and all subcategories recursively via API (this may take a while)...';
try {
// GetClear itemsglobal fromvisited theset currentfor categorythis operation
let allItems = await getAllCategoryMembersglobalVisited.clear(categoryName);
statusText.innerHTML = `Found ${allItems.length} items in main category. Checking subcategories...`;
// Get all subcategoriesitems recursively
const subcategories{ items: allItems, totalCategories } = await getAllSubcategoriesgetAllItemsRecursive(categoryName);
statusText.innerHTML = `Found ${subcategories.length} subcategories. Processing...`;
// Set to track processed categories
const processedCategories = new Set(); // To avoid processing the same category twice
// Process each subcategory
for (let i = 0; i < subcategories.length; i++) {
const subcategoryTitle = subcategories[i];
if (!processedCategories.has(subcategoryTitle)) {
processedCategories.add(subcategoryTitle);
const subcategoryItems = await getAllCategoryMembers(subcategoryTitle);
allItems = allItems.concat(subcategoryItems);
statusText.innerHTML = `Processed ${i + 1}/${subcategories.length} subcategories. Found ${allItems.length} items so far...`;
}
}
// Deduplicate items
Line 402 ⟶ 560:
}
const copySuccessincludeUrls = await copyToClipboardOrDownload(uniqueItemsurlCheckbox.join('\n'), categoryName + '_all')checked;
const formattedText = formatItems(uniqueItems, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_all_recursive');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueItems.length} unique items to clipboard from ${totalCategories} categories.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
 
// Handle "Copy Subcats from this Category" button click
copyDirectSubcatsBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering direct subcategories from this category via API...';
try {
const subcategories = await getAllSubcategories(categoryName);
if (subcategories.length === 0) {
statusText.innerHTML = 'No direct subcategories found in this category.';
return;
}
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(subcategories, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_direct_subcats');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${subcategories.length} direct subcategories to clipboard.`;
}
} catch (error) {
Line 417 ⟶ 603:
try {
const// processedCategoriesClear =global newvisited Set();set for this operation
globalVisited.clear();
const allSubcategories = await getAllSubcategoriesRecursive(categoryName, processedCategories);
const allSubcategories = await getAllSubcategoriesRecursive(categoryName);
// Deduplicate subcategories
Line 428 ⟶ 616:
}
const includeUrls = urlCheckbox.checked;
const copySuccess = await copyToClipboardOrDownload(uniqueSubcategories.join('\n'), categoryName + '_subcategories');
const formattedText = formatItems(uniqueSubcategories, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_subcategories');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueSubcategories.length} unique subcategories to clipboard.`;
Line 438 ⟶ 629:
});
 
// Handle "Copy Both" button click
console.log('Wikipedia Category Copier script (API version) has been loaded successfully!');
copyBothBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering both items and subcategories from this category via API...';
try {
const { items, subcategories } = await getBothItemsAndSubcategories(categoryName);
if (items.length === 0 && subcategories.length === 0) {
statusText.innerHTML = 'No items or subcategories found in this category.';
return;
}
// Combine items and subcategories
const combinedResults = [...items, ...subcategories];
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(combinedResults, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_both');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${items.length} items and ${subcategories.length} subcategories (${combinedResults.length} total) to clipboard.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
 
// Handle "Copy Both Recursively" button click
copyBothRecursiveBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering both items and subcategories recursively via API (this may take a while)...';
try {
// Clear global visited set for this operation
globalVisited.clear();
const { items: allItems, subcategories: allSubcategories, totalCategories } = await getBothItemsAndSubcategoriesRecursive(categoryName);
// Deduplicate items and subcategories
const uniqueItems = [...new Set(allItems)];
const uniqueSubcategories = [...new Set(allSubcategories)];
if (uniqueItems.length === 0 && uniqueSubcategories.length === 0) {
statusText.innerHTML = 'No items or subcategories found in this category or its subcategories.';
return;
}
// Combine items and subcategories
const combinedResults = [...uniqueItems, ...uniqueSubcategories];
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(combinedResults, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_both_recursive');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueItems.length} unique items and ${uniqueSubcategories.length} unique subcategories (${combinedResults.length} total) to clipboard from ${totalCategories} categories.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
 
console.log('Wikipedia Category Copier script has been loaded successfully!');
} else {
// Do nothing on non-category pages
console.log('Wikipedia Category Copier: Not a category page, script inactive.');
}