User:Dr pda/generatestats.js: Difference between revisions

Content deleted Content added
trying better choice for scales
Adding option to specify namespace for basic template transclusion query
 
(33 intermediate revisions by the same user not shown)
Line 3:
//e.g. {{featured article}}, calculates some statistics and plots a histogram.
//To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js
//then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestatisticsgeneratestats&action=edit
//See the talk page for documentation.
Line 15:
return a.value - b.value
}
 
function getBestScale(min,max){
scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000);
Line 42:
}
function getTemplateListgetSizeFromAPI(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
Line 48:
if (req.status == 200) {
// ...processing statements go here...
if(useTalkCategory || useTemplateCategory) jobsLeft--;
var response = req.responseXML.documentElement;
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
var pages = response.getElementsByTagName('page');
 
if(pages.length > 0){
if for(var i=0;i<pages.length; > 0i++){
pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
}
document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.';
 
//Check for more pages
var embeddedin = response.getElementsByTagName('embeddedin');
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
if(useTalkCategory || useTemplateCategory) jobsLeft++;
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template);
}
//If last page retrieved then start processing
else if(jobsLeft == 0){
//If using wiki text size
if(document.___location.href.indexOf('prosesize') == -1){
sortAndMakeChart();
}
//If using readable prose size (WARNING:Will load every page which transcludes template. Could be thousands of pages!!)
else{
for(var x in pagesList){
var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_'));
loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key);
}
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getArticlePageFromTalkPage(req,template) {
for(var i=0;i<pages.length; i++){
// only if req shows "loaded"
pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
if (req.readyState == 4) }{
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
//Check for more if(pages.length > 0){
for(var i=0;i<pages.length; i++){
var embeddedin = response.getElementsByTagName('embeddedin');
articleList.push(pages[i].getAttribute('subjectid'));
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getTemplateList,template);
}
//If last page retrieved then start processing
else{
pagesList.sort(sortByValue);
//Get top ten and bottom ten
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
}
 
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(talkQueryURL+'&gcmcontinue='+gcmcontinue,getArticlePageFromTalkPage,template);
}
//All pages retrieved
else{
var pageIds='';
for(i in articleList){
//API limited to 50 titles per query
if( i%50 == 0 && i>0){
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
pageIds='';
}
pageIds += '|' + articleList[i];
}
//Process remainder
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getPagesFromTemplateCategory(req,template) {
pagesList.reverse();
// only if req shows "loaded"
var topTen = '===Ten longest articles===\n';
for if (varreq.readyState i=0;i<10;i++= 4) {
// only if "OK"
topTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
if (req.status == 200) {
}
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
//Get Range
for(var i=0;i<pages.length; i++){
var max = Math.ceil(pagesList[0].value/1000);
articleList.push(encodeURIComponent(pages[i].getAttribute('title')));
var min = Math.floor(pagesList[pagesList.length-1].value/1000);
}
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
min = Math.floor(min/xScale)*xScale;
var numBins = (max - min)/xScale;
var categorymembers = response.getElementsByTagName('categorymembers');
//Calculate statistics
if(categorymembers.length > 0){
var sum = 0.0;
var binsgcmcontinue = new Arraycategorymembers[0].getAttribute(numBins'gcmcontinue');
loadXMLDocPassingTemplate(templateQueryURL+'&gcmcontinue='+gcmcontinue,getPagesFromTemplateCategory,template);
for(var i=0;i<numBins;i++){
bins[i]=0; }
//All pages retrieved
}
else{
for(var i=0;i<pagesList.length;i++){
sum += pagesList[ for(i].value*1.0; in articleList){
//API embeddedin query can only take one title
bins[Math.floor((pagesList[i].value/1000-min)/10.0)]++;
} jobsLeft++;
loadXMLDocPassingTemplate(queryURL+articleList[i],getSizeFromAPI,template);
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getProseSizeFromPage(req,title) {
var mean = Math.round(sum/pagesList.length)/1000;
// only if req shows "loaded"
var median = pagesList[Math.floor(pagesList.length/2)+1].value/1000;
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseText;
var start = response.indexOf('<p>',-1);
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
var stop = 0;
var proseSize = 0;
while(start > -1){
//Calculate best vertical scale
var yMaxstop = Math.maxresponse.applyindexOf(Math'</p>',binsstart);
var yScalepara = getBestScaleresponse.substring(0start+3,yMaxstop);
yMaxpara = Mathpara.ceilreplace(yMax/yScale\[\d{1,3}\]/g,'')*yScale;
para = para.replace(/citation needed/g,'');
var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + '10 start:0';
para = para.replace(/(<([^>]+)>)/ig,'');
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
proseSize += para.length;
start = response.indexOf('<p>',stop);
//Draw chart
var chart = '===Chart===\n<timeline>\nColors=\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:300 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
for(var i=0;i<numBins;i++){
chart += ' bar:'+(min+i*10)+' from:0 till:'+bins[i]+'\n';
}
//Add axis label
chart += ' bar:'+(min + Math.floor(2*numBins/5)*10)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
document.getElementById('wpPreview').click();
}
}
proseList[proseIndex++] = new keyValuePair(title,proseSize);
document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.';
//If last page retrieved then start processing
if(proseIndex == index){
pagesList = proseList;
sortAndMakeChart();
}
} else {
alert("There was a problem retrieving the XML data:\n" +
Line 128 ⟶ 204:
}
function generateStatistics(){
function sortAndMakeChart(){
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
pagesList.sort(sortByValue);
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
//Get top ten and bottom ten
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml';
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
pagesList.reverse();
var topTen = '===Ten longest articles===\n';
for(var i=0;i<10;i++){
topTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
 
var list = '===List of articles by size===\n';
if(document.___location.href.indexOf('&list') != -1){
for(var i=0;i<pagesList.length;i++){
list += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
}
//Get Range
var max = Math.ceil(pagesList[0].value/1024);
var min = Math.floor(pagesList[pagesList.length-1].value/1024);
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
min = Math.floor(min/xScale)*xScale;
var numBins = (max - min)/xScale;
//Calculate statistics
var sum = 0.0;
var bins = new Array(numBins);
for(var i=0;i<numBins;i++){
bins[i]=0;
}
for(var i=0;i<pagesList.length;i++){
sum += pagesList[i].value*1.0;
bins[Math.floor((pagesList[i].value/1024-min)/(xScale*1.0))]++;
}
var mean = (sum/(pagesList.length*1024)).toFixed(3);
var median = (pagesList[Math.floor(pagesList.length/2)+1].value/1024).toFixed(3);
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
//Calculate best vertical scale
var yMax = Math.max.apply(Math,bins);
var yScale = getBestScale(0,yMax);
yScale = Math.max(1,yScale);
yMax = Math.ceil(yMax/yScale)*yScale;
var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + ' start:0';
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
//Draw chart
var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:303 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
for(var i=0;i<numBins;i++){
chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
}
//Add axis label
chart += ' bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
 
if(document.___location.href.indexOf('&list') != -1){
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
}
else{
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
}
document.getElementById('wpPreview').click();
}
function generateStatistics(){
pagesList = new Array();
index = 0;
proseList = new Array();
loadXMLDocPassingTemplate(queryURL,getTemplateList,template);
proseIndex = 0;
articleList = new Array();
template ='';
queryURL ='';
talkQueryURL ='';
templateQueryURL ='';
jobsLeft = 0;
namespace = '0';
 
useTalkCategory = (document.___location.href.indexOf('usetalkcategory') != -1) ? true : false;
useTemplateCategory = (document.___location.href.indexOf('usetemplatecategory') != -1) ? true : false;
specifyNamespace = (document.___location.href.indexOf('specifynamespace') != -1) ? true : false;
 
if(specifyNamespace){
namespace=prompt("Enter the number of the namespace the pages are in\n (0=article, 2=User, 4=Wikipedia etc)","");
}
 
if(useTalkCategory){
template=prompt("Enter the talk page category you want to check for\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
talkQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=1&prop=info&inprop=subjectid&format=xml';
queryURL = '/w/api.php?action=query&prop=info&format=xml&pageids=';
loadXMLDocPassingTemplate(talkQueryURL,getArticlePageFromTalkPage,template);
}
else if(useTemplateCategory){
template=prompt("Enter the template category you want to check\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
templateQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=10&prop=info&format=xml';
queryURL = '/w/api.php?action=query&generator=embeddedin&geilimit=500&geinamespace=0&prop=info&format=xml&geititle=';
loadXMLDocPassingTemplate(templateQueryURL,getPagesFromTemplateCategory,template);
}
else{
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=' + namespace + '&prop=info&format=xml';
loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
}
document.getElementById('wpTextbox1').value = 'Started.';
}