User:Dr pda/generatestats.js: Difference between revisions

Content deleted Content added
fixed typo, added option to output entire list of articles
Adding option to specify namespace for basic template transclusion query
 
(21 intermediate revisions by the same user not shown)
Line 48:
if (req.status == 200) {
// ...processing statements go here...
if(useTalkCategory || useTemplateCategory) jobsLeft--;
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
 
if(pages.length > 0){
Line 57 ⟶ 58:
}
document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.';
 
//Check for more pages
var embeddedin = response.getElementsByTagName('embeddedin');
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
if(useTalkCategory || useTemplateCategory) jobsLeft++;
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template);
}
//If last page retrieved then start processing
else if(jobsLeft == 0){
//If using wiki text size
if(document.___location.href.indexOf('prosesize') == -1){
Line 73 ⟶ 76:
for(var x in pagesList){
var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_'));
var myTimeOut = setTimeout("loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key)",1000);
}
}
Line 85 ⟶ 88:
}
function getArticlePageFromTalkPage(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
for(var i=0;i<pages.length; i++){
articleList.push(pages[i].getAttribute('subjectid'));
}
 
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(talkQueryURL+'&gcmcontinue='+gcmcontinue,getArticlePageFromTalkPage,template);
}
//All pages retrieved
else{
var pageIds='';
for(i in articleList){
//API limited to 50 titles per query
if( i%50 == 0 && i>0){
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
pageIds='';
}
pageIds += '|' + articleList[i];
}
//Process remainder
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getPagesFromTemplateCategory(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
for(var i=0;i<pages.length; i++){
articleList.push(encodeURIComponent(pages[i].getAttribute('title')));
}
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(templateQueryURL+'&gcmcontinue='+gcmcontinue,getPagesFromTemplateCategory,template);
}
//All pages retrieved
else{
for(i in articleList){
//API embeddedin query can only take one title
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+articleList[i],getSizeFromAPI,template);
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getProseSizeFromPage(req,title) {
Line 108 ⟶ 190:
}
proseList[proseIndex++] = new keyValuePair(title,proseSize);
document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.';
//If last page retrieved then start processing
if(proseIndex == index){
Line 128 ⟶ 210:
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/10001024) + ' kB)\n');
}
Line 134 ⟶ 216:
var topTen = '===Ten longest articles===\n';
for(var i=0;i<10;i++){
topTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/10001024) + ' kB)\n');
}
 
Line 140 ⟶ 222:
if(document.___location.href.indexOf('&list') != -1){
for(var i=0;i<pagesList.length;i++){
list += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/10001024) + ' kB)\n');
}
}
//Get Range
var max = Math.ceil(pagesList[0].value/10001024);
var min = Math.floor(pagesList[pagesList.length-1].value/10001024);
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
Line 160 ⟶ 242:
for(var i=0;i<pagesList.length;i++){
sum += pagesList[i].value*1.0;
bins[Math.floor((pagesList[i].value/10001024-min)/(xScale*1.0))]++;
}
var mean = Math.round(sum/(pagesList.length*1024)).toFixed(3)/1000;
var median = (pagesList[Math.floor(pagesList.length/2)+1].value/10001024).toFixed(3);
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
Line 176 ⟶ 258:
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
//Draw chart
var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:300303 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
for(var i=0;i<numBins;i++){
chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
Line 184 ⟶ 266:
 
if(document.___location.href.indexOf('&list') != -1){
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
}
else{
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
}
document.getElementById('wpPreview').click();
Line 195 ⟶ 277:
function generateStatistics(){
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml';
pagesList = new Array();
index = 0;
proseList = new Array();
proseIndex = 0;
articleList = new Array();
loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
template ='';
queryURL ='';
talkQueryURL ='';
templateQueryURL ='';
jobsLeft = 0;
namespace = '0';
 
useTalkCategory = (document.___location.href.indexOf('usetalkcategory') != -1) ? true : false;
useTemplateCategory = (document.___location.href.indexOf('usetemplatecategory') != -1) ? true : false;
specifyNamespace = (document.___location.href.indexOf('specifynamespace') != -1) ? true : false;
 
if(specifyNamespace){
namespace=prompt("Enter the number of the namespace the pages are in\n (0=article, 2=User, 4=Wikipedia etc)","");
}
 
if(useTalkCategory){
template=prompt("Enter the talk page category you want to check for\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
talkQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=1&prop=info&inprop=subjectid&format=xml';
queryURL = '/w/api.php?action=query&prop=info&format=xml&pageids=';
loadXMLDocPassingTemplate(talkQueryURL,getArticlePageFromTalkPage,template);
}
else if(useTemplateCategory){
template=prompt("Enter the template category you want to check\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
templateQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=10&prop=info&format=xml';
queryURL = '/w/api.php?action=query&generator=embeddedin&geilimit=500&geinamespace=0&prop=info&format=xml&geititle=';
loadXMLDocPassingTemplate(templateQueryURL,getPagesFromTemplateCategory,template);
}
else{
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=' + namespace + '&prop=info&format=xml';
loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
}
document.getElementById('wpTextbox1').value = 'Started.';
}