Content deleted Content added
Polygnotus (talk | contribs) No edit summary |
Polygnotus (talk | contribs) add "both" and "both recursive" buttons, re-order |
||
(16 intermediate revisions by the same user not shown) | |||
Line 1:
// Wikipedia Category Items Copier - Fixed Redirect Filtering
const API_DELAY = 500; // Delay between API requests in milliseconds
const MAX_RETRIES = 3; // Maximum number of retries for failed requests
// Only run on Wikipedia category pages
Line 18 ⟶ 17:
container.style.border = '1px solid #a2a9b1';
container.style.borderRadius = '3px';
// Helper function to create tooltip
function addTooltip(element, text) {
element.title = text;
element.style.position = 'relative';
}
// Create the "Copy Items" button
const copyItemsBtn = document.createElement('button');
copyItemsBtn.textContent = 'Copy
copyItemsBtn.style.marginRight = '10px';
copyItemsBtn.style.padding = '8px 12px';
copyItemsBtn.style.cursor = 'pointer';
addTooltip(copyItemsBtn, 'Copy all items in this category. Not recursive.');
// Create the "Copy All Items" button
const copyAllItemsBtn = document.createElement('button');
copyAllItemsBtn.textContent = 'Copy
copyAllItemsBtn.style.marginRight = '10px';
copyAllItemsBtn.style.padding = '8px 12px';
copyAllItemsBtn.style.cursor = 'pointer';
addTooltip(copyAllItemsBtn, 'Copy all items in this category AND all items in its subcategories.');
// Create the "Copy Subcats from this Category" button
const copyDirectSubcatsBtn = document.createElement('button');
copyDirectSubcatsBtn.textContent = 'Copy subcats';
copyDirectSubcatsBtn.style.marginRight = '10px';
copyDirectSubcatsBtn.style.padding = '8px 12px';
copyDirectSubcatsBtn.style.cursor = 'pointer';
addTooltip(copyDirectSubcatsBtn, 'Copy all subcategories of this category. Not recursive.');
// Create the "Copy Subcategories" button
const copySubcatsBtn = document.createElement('button');
copySubcatsBtn.textContent = 'Copy
copySubcatsBtn.style.marginRight = '10px';
copySubcatsBtn.style.padding = '8px 12px';
copySubcatsBtn.style.cursor = 'pointer';
addTooltip(copySubcatsBtn, 'Copy all subcategories of this category and its subcategories.');
// Create the "Copy Both" button
const copyBothBtn = document.createElement('button');
copyBothBtn.textContent = 'Copy both';
copyBothBtn.style.marginRight = '10px';
copyBothBtn.style.padding = '8px 12px';
copyBothBtn.style.cursor = 'pointer';
addTooltip(copyBothBtn, 'Copy all items and subcategories from this category. Not recursive.');
// Create the "Copy Both Recursively" button
const copyBothRecursiveBtn = document.createElement('button');
copyBothRecursiveBtn.textContent = 'Copy both recursively';
copyBothRecursiveBtn.style.marginRight = '10px';
copyBothRecursiveBtn.style.padding = '8px 12px';
copyBothRecursiveBtn.style.cursor = 'pointer';
addTooltip(copyBothRecursiveBtn, 'Copy all items and subcategories from this category and all its subcategories.');
// Add checkbox for URL export
const urlCheckbox = document.createElement('input');
urlCheckbox.type = 'checkbox';
urlCheckbox.id = 'includeUrls';
urlCheckbox.style.marginLeft = '15px';
const urlLabel = document.createElement('label');
urlLabel.htmlFor = 'includeUrls';
urlLabel.textContent = 'Whole URLs';
urlLabel.style.marginLeft = '5px';
addTooltip(urlLabel, 'Include full Wikipedia URLs for each item in the export');
// Create status text
Line 44 ⟶ 91:
statusText.style.color = '#555';
// Add buttons to container in the requested order
container.appendChild(copyItemsBtn);
container.appendChild(copyAllItemsBtn);
container.appendChild(copyDirectSubcatsBtn);
container.appendChild(copySubcatsBtn);
container.appendChild(copyBothBtn);
container.appendChild(copyBothRecursiveBtn);
container.appendChild(urlCheckbox);
container.appendChild(urlLabel);
container.appendChild(statusText);
Line 56 ⟶ 108:
} else {
document.querySelector('#content').prepend(container);
}
// Global visited set to prevent visiting any page more than once across all operations
const globalVisited = new Set();
// Function to format items with URLs if requested
function formatItems(items, includeUrls) {
if (!includeUrls) {
return items.join('\n');
}
// When URLs are requested, return ONLY the URLs, not the article names
return items.map(item => {
const encodedTitle = encodeURIComponent(item.replace(/ /g, '_'));
return `https://en.wikipedia.org/wiki/${encodedTitle}`;
}).join('\n');
}
Line 88 ⟶ 156:
// If clipboard fails, offer download instead
const filename = `${categoryName.replace(/[^a-z0-9]/gi, '_')}-items.txt`;
// Clear the status text completely and show only the clipboard failure message
statusText.innerHTML = `<p>Clipboard access failed. Click the link below to download items:</p>`;
offerTextAsDownload(text, filename);
resolve(false);
}
Line 162 ⟶ 231:
}
// Enhanced API request function with retry logic, rate limiting, and maxlag handling
async function
try {
// Handle rate limiting
if (response.status === 429 || response.status >= 500) {
if (retryCount < MAX_RETRIES) {
const waitTime = Math.pow(2, retryCount) * 1000; // Exponential backoff
statusText.innerHTML += `<br>Rate limited or server error, waiting ${waitTime/1000}s before retry ${retryCount + 1}/${MAX_RETRIES}...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount + 1);
} else {
throw new Error(`Request failed after ${MAX_RETRIES} retries: ${response.status}`);
}
}
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const data = await response.json();
// Handle maxlag errors - these don't count as retries since they're not real failures
if (data.error &&
const lagTime = data.error.lag || 5; // Default to 5 seconds if lag not specified
const waitTime = (lagTime + 2) * 1000; // Add 2 second buffer
statusText.innerHTML += `<br>Database lagged (${lagTime}s), waiting ${waitTime/1000}s before retry...`;
await new Promise(resolve => setTimeout(resolve, waitTime));
return makeApiRequest(url, retryCount); // Don't increment retry count for maxlag
}
//
throw new Error(`API Error: ${data.error.code} - ${data.error.info}`);
}
return data;
} catch (error) {
statusText.
await new Promise(resolve => setTimeout(resolve, 1000));
return makeApiRequest(url, retryCount + 1);
} else {
throw error;
}
}
}
// Function to get all subcategories of a category
async function getSubcategories(categoryTitle, continueToken = null) {
try {
// Base API URL for subcategories (only get items with namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=14&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 211 ⟶ 299:
statusText.textContent = `Fetching subcategories for: ${categoryTitle}...`;
if (!data.query || !data.query.categorymembers) {
Line 222 ⟶ 306:
}
// Extract subcategories and continue token, prefix with "Category:"
const subcategories = data.query.categorymembers.map(member => member.title
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
Line 229 ⟶ 313:
} catch (error) {
console.error("API request error:", error);
statusText.
return { subcategories: [], continueToken: null };
}
Line 238 ⟶ 322:
try {
// Base API URL for non-category members (exclude namespace 14, which is Category)
// Add maxlag parameter to be respectful of server load
let apiUrl = `https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:${encodeURIComponent(categoryTitle)}&cmnamespace=0|1|2|3|4|5|6|7|8|9|10|11|12|13|15&cmlimit=max&maxlag=5&format=json&origin=*`;
// Add continue token if provided
Line 247 ⟶ 332:
statusText.textContent = `Fetching items for: ${categoryTitle}...`;
if (!data.query || !data.query.categorymembers) {
Line 258 ⟶ 339:
}
// Extract members
const members = data.query.categorymembers.map(member => member.title);
const nextContinueToken = data.continue ? data.continue.cmcontinue : null;
Line 265 ⟶ 346:
} catch (error) {
console.error("API request error:", error);
statusText.
return { members: [], continueToken: null };
}
Line 284 ⟶ 365:
statusText.innerHTML = `Retrieved ${allMembers.length} items from "${categoryTitle}" (page ${pagesProcessed})...`;
} while (continueToken);
Line 306 ⟶ 382:
pagesProcessed++;
} while (continueToken);
Line 316 ⟶ 387:
}
// Function to recursively get all subcategories with circular reference detection
async function getAllSubcategoriesRecursive(categoryTitle
const allSubcategories = [];
const queue = [`Category:${categoryTitle}`]; // Start with prefixed category
while (queue.length > 0) {
const currentCategory = queue.shift();
// Skip if already visited (circular reference detection)
if (visited.has(currentCategory) || globalVisited.has(currentCategory)) {
continue;
}
visited.add(currentCategory);
globalVisited.add(currentCategory);
statusText.innerHTML = `Exploring subcategories (found ${allSubcategories.length} categories, queue: ${queue.length})...`;
// Get direct subcategories (remove "Category:" prefix for API call)
const categoryNameForApi = currentCategory.replace('Category:', '');
const directSubcategories = await getAllSubcategories(categoryNameForApi);
// Add new subcategories to results and queue
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
allSubcategories.push(subcategory);
queue.push(subcategory);
}
}
}
return allSubcategories;
}
// Function to recursively get all items from a category and all its subcategories
async function getAllItemsRecursive(categoryTitle) {
const visited = new Set();
const allItems = [];
const queue = [categoryTitle]; // Start without prefix for consistency
let totalCategories = 0;
while (queue.length > 0) {
const currentCategory = queue.shift();
const categoryKey = `Category:${currentCategory}`;
// Skip if already visited (circular reference detection)
if (visited.has(categoryKey) || globalVisited.has(categoryKey)) {
continue;
}
visited.add(categoryKey);
globalVisited.add(categoryKey);
totalCategories++;
statusText.innerHTML = `Getting items from "${currentCategory}" (processed ${totalCategories} categories, found ${allItems.length} items, queue: ${queue.length})...`;
// Get items from current category
const currentItems = await getAllCategoryMembers(currentCategory);
allItems.push(...currentItems);
// Get direct subcategories and add to queue
const directSubcategories = await getAllSubcategories(currentCategory);
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
// Remove "Category:" prefix for queue consistency
const subcategoryName = subcategory.replace('Category:', '');
queue.push(subcategoryName);
}
}
}
}
// Function to get both items and subcategories from a category (non-recursive)
async function getBothItemsAndSubcategories(categoryTitle) {
statusText.innerHTML = 'Gathering items and subcategories from this category...';
const items = await getAllCategoryMembers(categoryTitle);
return { items, subcategories };
}
// Function to recursively get both items and subcategories from a category and all its subcategories
async function getBothItemsAndSubcategoriesRecursive(categoryTitle) {
const visited = new Set();
const allItems = [];
const allSubcategories = [];
const queue = [categoryTitle]; // Start without prefix for consistency
let totalCategories = 0;
while (queue.length > 0) {
const currentCategory = queue.shift();
const categoryKey = `Category:${currentCategory}`;
// Skip if already visited (circular reference detection)
if (visited.has(categoryKey) || globalVisited.has(categoryKey)) {
continue;
}
visited.add(categoryKey);
globalVisited.add(categoryKey);
totalCategories++;
statusText.innerHTML = `Getting items and subcategories from "${currentCategory}" (processed ${totalCategories} categories, found ${allItems.length} items, ${allSubcategories.length} subcategories, queue: ${queue.length})...`;
// Get items from current category
const currentItems = await getAllCategoryMembers(currentCategory);
allItems.push(...currentItems);
// Get direct subcategories
const directSubcategories = await getAllSubcategories(currentCategory);
// Add subcategories to results and queue
for (const subcategory of directSubcategories) {
if (!visited.has(subcategory) && !globalVisited.has(subcategory)) {
allSubcategories.push(subcategory);
// Remove "Category:" prefix for queue consistency
const subcategoryName = subcategory.replace('Category:', '');
queue.push(subcategoryName);
}
}
}
return { items: allItems, subcategories: allSubcategories, totalCategories };
}
Line 354 ⟶ 528:
}
const
const formattedText = formatItems(items, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName);
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${items.length} items to clipboard.`;
Line 366 ⟶ 543:
// Handle "Copy All Items" button click
copyAllItemsBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering items from this category and all subcategories recursively via API (this may take a while)...';
try {
//
// Get all
const
// Deduplicate items
Line 402 ⟶ 560:
}
const
const formattedText = formatItems(uniqueItems, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_all_recursive');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueItems.length} unique items to clipboard from ${totalCategories} categories.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
// Handle "Copy Subcats from this Category" button click
copyDirectSubcatsBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering direct subcategories from this category via API...';
try {
const subcategories = await getAllSubcategories(categoryName);
if (subcategories.length === 0) {
statusText.innerHTML = 'No direct subcategories found in this category.';
return;
}
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(subcategories, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_direct_subcats');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${subcategories.length} direct subcategories to clipboard.`;
}
} catch (error) {
Line 417 ⟶ 603:
try {
globalVisited.clear();
const allSubcategories = await getAllSubcategoriesRecursive(categoryName);
// Deduplicate subcategories
Line 428 ⟶ 616:
}
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(uniqueSubcategories, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_subcategories');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueSubcategories.length} unique subcategories to clipboard.`;
Line 438 ⟶ 629:
});
// Handle "Copy Both" button click
copyBothBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering both items and subcategories from this category via API...';
try {
const { items, subcategories } = await getBothItemsAndSubcategories(categoryName);
if (items.length === 0 && subcategories.length === 0) {
statusText.innerHTML = 'No items or subcategories found in this category.';
return;
}
// Combine items and subcategories
const combinedResults = [...items, ...subcategories];
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(combinedResults, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_both');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${items.length} items and ${subcategories.length} subcategories (${combinedResults.length} total) to clipboard.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
// Handle "Copy Both Recursively" button click
copyBothRecursiveBtn.addEventListener('click', async () => {
statusText.innerHTML = 'Gathering both items and subcategories recursively via API (this may take a while)...';
try {
// Clear global visited set for this operation
globalVisited.clear();
const { items: allItems, subcategories: allSubcategories, totalCategories } = await getBothItemsAndSubcategoriesRecursive(categoryName);
// Deduplicate items and subcategories
const uniqueItems = [...new Set(allItems)];
const uniqueSubcategories = [...new Set(allSubcategories)];
if (uniqueItems.length === 0 && uniqueSubcategories.length === 0) {
statusText.innerHTML = 'No items or subcategories found in this category or its subcategories.';
return;
}
// Combine items and subcategories
const combinedResults = [...uniqueItems, ...uniqueSubcategories];
const includeUrls = urlCheckbox.checked;
const formattedText = formatItems(combinedResults, includeUrls);
const copySuccess = await copyToClipboardOrDownload(formattedText, categoryName + '_both_recursive');
if (copySuccess) {
statusText.innerHTML = `Successfully copied ${uniqueItems.length} unique items and ${uniqueSubcategories.length} unique subcategories (${combinedResults.length} total) to clipboard from ${totalCategories} categories.`;
}
} catch (error) {
statusText.innerHTML = `Error: ${error.message}`;
console.error('Error:', error);
}
});
console.log('Wikipedia Category Copier script has been loaded successfully!');
} else {
console.log('Wikipedia Category Copier: Not a category page, script inactive.');
}
|