User:Dr pda/generatestats.js: Difference between revisions

Content deleted Content added
trying new way of getting maximum
Adding option to specify namespace for basic template transclusion query
 
(29 intermediate revisions by the same user not shown)
Line 3:
//e.g. {{featured article}}, calculates some statistics and plots a histogram.
//To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js
//then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestatisticsgeneratestats&action=edit
//See the talk page for documentation.
Line 15:
return a.value - b.value
}
 
function getBestScale(min,max){
scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000);
Line 42:
}
function getTemplateListgetSizeFromAPI(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
Line 48:
if (req.status == 200) {
// ...processing statements go here...
if(useTalkCategory || useTemplateCategory) jobsLeft--;
var response = req.responseXML.documentElement;
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
var pages = response.getElementsByTagName('page');
 
if(pages.length > 0){
if for(var i=0;i<pages.length; > 0i++){
pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
}
document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.';
 
//Check for more pages
var embeddedin = response.getElementsByTagName('embeddedin');
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
if(useTalkCategory || useTemplateCategory) jobsLeft++;
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template);
}
//If last page retrieved then start processing
else if(jobsLeft == 0){
//If using wiki text size
if(document.___location.href.indexOf('prosesize') == -1){
sortAndMakeChart();
}
//If using readable prose size (WARNING:Will load every page which transcludes template. Could be thousands of pages!!)
else{
for(var x in pagesList){
var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_'));
loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key);
}
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getArticlePageFromTalkPage(req,template) {
for(var i=0;i<pages.length; i++){
// only if req shows "loaded"
pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
if (req.readyState == 4) }{
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
//Check for more if(pages.length > 0){
for(var i=0;i<pages.length; i++){
var embeddedin = response.getElementsByTagName('embeddedin');
articleList.push(pages[i].getAttribute('subjectid'));
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
document.getElementById('wpTextbox1').value = 'Retrieved ' + index + 'articles.\n To abort click the back button in your browser.';
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getTemplateList,template);
}
//If last page retrieved then start processing
else{
pagesList.sort(sortByValue);
//Get top ten and bottom ten
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
}
 
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(talkQueryURL+'&gcmcontinue='+gcmcontinue,getArticlePageFromTalkPage,template);
}
//All pages retrieved
else{
var pageIds='';
for(i in articleList){
//API limited to 50 titles per query
if( i%50 == 0 && i>0){
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
pageIds='';
}
pageIds += '|' + articleList[i];
}
//Process remainder
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getPagesFromTemplateCategory(req,template) {
pagesList.reverse();
// only if req shows "loaded"
var topTen = '===Ten longest articles===\n';
for if (varreq.readyState i=0;i<10;i++= 4) {
// only if "OK"
topTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
if (req.status == 200) {
}
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
//Get Range
for(var i=0;i<pages.length; i++){
var max = Math.ceil(pagesList[0].value/1000);
articleList.push(encodeURIComponent(pages[i].getAttribute('title')));
var min = Math.floor(pagesList[pagesList.length-1].value/1000);
}
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
min = Math.floor(min/xScale)*xScale;
var numBins = (max - min)/xScale;
var categorymembers = response.getElementsByTagName('categorymembers');
//Calculate statistics
if(categorymembers.length > 0){
var sum = 0.0;
var binsgcmcontinue = new Arraycategorymembers[0].getAttribute(numBins'gcmcontinue');
loadXMLDocPassingTemplate(templateQueryURL+'&gcmcontinue='+gcmcontinue,getPagesFromTemplateCategory,template);
for(var i=0;i<numBins;i++){
bins[i]=0;
}
for(var i=0;i<pagesList.length;i++){
sum += pagesList[i].value*1.0;
bins[Math.floor((pagesList[i].value/1000-min)/10.0)]++;
}
var mean = Math.round(sum/pagesList.length)/1000;
var median = pagesList[Math.floor(pagesList.length/2)+1].value/1000;
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
//Calculate best vertical scale
var yMax = 0;
for(var x in bins){
if(bins[x]>0) yMax = bins[x];
}
//All pages retrieved
var yScale = getBestScale(0,yMax);
else{
alert(min+' '+max+' '+xScale+' '+numBins+' '+yMax+' '+yScale);
for(i in articleList){
yMax = Math.ceil(yMax/yScale)*yScale;
//API embeddedin query can only take one title
alert(min+' '+max+' '+xScale+' '+numBins+' '+yMax+' '+yScale);
jobsLeft++;
var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + '10 start:0';
loadXMLDocPassingTemplate(queryURL+articleList[i],getSizeFromAPI,template);
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getProseSizeFromPage(req,title) {
//Draw chart
// only if req shows "loaded"
var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:300 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
if (req.readyState == 4) {
for(var i=0;i<numBins;i++){
// only if "OK"
chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
if (req.status == 200) {
}
// ...processing statements go here...
//Add axis label
var response = req.responseText;
chart += ' bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
var start = response.indexOf('<p>',-1);
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
var stop = 0;
document.getElementById('wpPreview').click();
var proseSize = 0;
}
while(start > -1){
stop = response.indexOf('</p>',start);
var para = response.substring(start+3,stop);
para = para.replace(/\[\d{1,3}\]/g,'');
para = para.replace(/citation needed/g,'');
para = para.replace(/(<([^>]+)>)/ig,'');
proseSize += para.length;
start = response.indexOf('<p>',stop);
}
proseList[proseIndex++] = new keyValuePair(title,proseSize);
document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.';
//If last page retrieved then start processing
if(proseIndex == index){
pagesList = proseList;
sortAndMakeChart();
}
} else {
alert("There was a problem retrieving the XML data:\n" +
Line 134 ⟶ 204:
}
function generateStatistics(){
function sortAndMakeChart(){
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
pagesList.sort(sortByValue);
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
//Get top ten and bottom ten
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml';
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
pagesList.reverse();
var topTen = '===Ten longest articles===\n';
for(var i=0;i<10;i++){
topTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
 
var list = '===List of articles by size===\n';
if(document.___location.href.indexOf('&list') != -1){
for(var i=0;i<pagesList.length;i++){
list += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
}
//Get Range
var max = Math.ceil(pagesList[0].value/1024);
var min = Math.floor(pagesList[pagesList.length-1].value/1024);
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
min = Math.floor(min/xScale)*xScale;
var numBins = (max - min)/xScale;
//Calculate statistics
var sum = 0.0;
var bins = new Array(numBins);
for(var i=0;i<numBins;i++){
bins[i]=0;
}
for(var i=0;i<pagesList.length;i++){
sum += pagesList[i].value*1.0;
bins[Math.floor((pagesList[i].value/1024-min)/(xScale*1.0))]++;
}
var mean = (sum/(pagesList.length*1024)).toFixed(3);
var median = (pagesList[Math.floor(pagesList.length/2)+1].value/1024).toFixed(3);
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
//Calculate best vertical scale
var yMax = Math.max.apply(Math,bins);
var yScale = getBestScale(0,yMax);
yScale = Math.max(1,yScale);
yMax = Math.ceil(yMax/yScale)*yScale;
var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + ' start:0';
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
//Draw chart
var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:303 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
for(var i=0;i<numBins;i++){
chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
}
//Add axis label
chart += ' bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
 
if(document.___location.href.indexOf('&list') != -1){
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
}
else{
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
}
document.getElementById('wpPreview').click();
}
function generateStatistics(){
pagesList = new Array();
index = 0;
proseList = new Array();
loadXMLDocPassingTemplate(queryURL,getTemplateList,template);
proseIndex = 0;
articleList = new Array();
template ='';
queryURL ='';
talkQueryURL ='';
templateQueryURL ='';
jobsLeft = 0;
namespace = '0';
 
useTalkCategory = (document.___location.href.indexOf('usetalkcategory') != -1) ? true : false;
useTemplateCategory = (document.___location.href.indexOf('usetemplatecategory') != -1) ? true : false;
specifyNamespace = (document.___location.href.indexOf('specifynamespace') != -1) ? true : false;
 
if(specifyNamespace){
namespace=prompt("Enter the number of the namespace the pages are in\n (0=article, 2=User, 4=Wikipedia etc)","");
}
 
if(useTalkCategory){
template=prompt("Enter the talk page category you want to check for\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
talkQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=1&prop=info&inprop=subjectid&format=xml';
queryURL = '/w/api.php?action=query&prop=info&format=xml&pageids=';
loadXMLDocPassingTemplate(talkQueryURL,getArticlePageFromTalkPage,template);
}
else if(useTemplateCategory){
template=prompt("Enter the template category you want to check\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
templateQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=10&prop=info&format=xml';
queryURL = '/w/api.php?action=query&generator=embeddedin&geilimit=500&geinamespace=0&prop=info&format=xml&geititle=';
loadXMLDocPassingTemplate(templateQueryURL,getPagesFromTemplateCategory,template);
}
else{
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=' + namespace + '&prop=info&format=xml';
loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
}
document.getElementById('wpTextbox1').value = 'Started.';
}