User:Dr pda/generatestats.js

This is an old revision of this page, as edited by Dr pda (talk | contribs) at 06:53, 23 November 2007 (trying new way of getting maximum). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
 //<pre>
 //This script generates a list of the ten shortest and ten longest articles which transclude a template,
 //e.g. {{featured article}}, calculates some statistics and plots a histogram.
 //To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js
 //then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestatistics&action=edit
 //See the talk page for documentation.
 
 function keyValuePair(key,value){
  this.key = key;
  this.value = value;
 }
 
 
 function sortByValue(a, b){
  return a.value - b.value
 }

 function getBestScale(min,max){
   scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000);
   var val = (max-min)/15;
   for(var x in scales){
    if (scales[x]-val >= 0) return scales[x];
   }
   return 5000;
 }
 
 function loadXMLDocPassingTemplate(url,handler,template)
 {
   // branch for native XMLHttpRequest object
   if (window.XMLHttpRequest) {
       var req = new XMLHttpRequest();
   }
   // branch for IE/Windows ActiveX version
   else if (window.ActiveXObject) {
      var req = new ActiveXObject("Microsoft.XMLHTTP");
  }
  if (req) {
   req.onreadystatechange = function () {handler(req,template)};
   req.open("GET", url, true);
   req.send("");
  }
 }
 
 function getTemplateList(req,template) {
     // only if req shows "loaded"
     if (req.readyState == 4) {
       // only if "OK"
       if (req.status == 200) {
         // ...processing statements go here...
 	var response = req.responseXML.documentElement;
        var pages = response.getElementsByTagName('page');
 
        if(pages.length > 0){
 
          for(var i=0;i<pages.length; i++){
            pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
          }
 
 	  //Check for more pages
 	  var embeddedin = response.getElementsByTagName('embeddedin');
 	  if(embeddedin.length > 0){
 	    var geicontinue = embeddedin[0].getAttribute('geicontinue');
            document.getElementById('wpTextbox1').value = 'Retrieved ' + index + 'articles.\n To abort click the back button in your browser.';
 	      loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getTemplateList,template);
          }
 	  //If last page retrieved then start processing
          else{
 	    pagesList.sort(sortByValue);
 	   //Get top ten and bottom ten
 	   var bottomTen = '===Ten shortest articles===\n';
 	   for(var i=0;i<10;i++){
 	     bottomTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
           }
 
 	   pagesList.reverse();
 	   var topTen = '===Ten longest articles===\n';
 	   for(var i=0;i<10;i++){
 	     topTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1000) + ' kB)\n');
 	   }
 
 	   //Get Range
 	   var max = Math.ceil(pagesList[0].value/1000);
 	   var min = Math.floor(pagesList[pagesList.length-1].value/1000);
 	   var xScale = getBestScale(min,max);
 	   max = Math.ceil(max/xScale)*xScale;
 	   min = Math.floor(min/xScale)*xScale;
 	   var numBins = (max - min)/xScale;
 
 	   //Calculate statistics
 	   var sum = 0.0;
 	   var bins = new Array(numBins);
 	   for(var i=0;i<numBins;i++){
 	     bins[i]=0;
 	   }
 	   for(var i=0;i<pagesList.length;i++){
 	     sum += pagesList[i].value*1.0;
 	     bins[Math.floor((pagesList[i].value/1000-min)/10.0)]++;
 	   }
 
 	   var mean = Math.round(sum/pagesList.length)/1000;
 	   var median = pagesList[Math.floor(pagesList.length/2)+1].value/1000;
 
 	   var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
 
 	   //Calculate best vertical scale
 	   var yMax = 0;
           for(var x in bins){
             if(bins[x]>0) yMax = bins[x];
           }
 	   var yScale = getBestScale(0,yMax);
           alert(min+' '+max+' '+xScale+' '+numBins+' '+yMax+' '+yScale);
 	   yMax = Math.ceil(yMax/yScale)*yScale;
           alert(min+' '+max+' '+xScale+' '+numBins+' '+yMax+' '+yScale);
 	   var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + '10 start:0';
 	   if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
 
 	   //Draw chart
 	   var chart = '===Chart===\n<timeline>\nColors=\n  id:lightgrey  value:gray(0.8)\n  id:darkgrey  value:gray(0.8)\n  id:white value:rgb(1,1,1)\n  id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize  = width:auto height:300 barincrement:25\nPlotArea   = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod     = from:0 till:' + yMax +'\nTimeAxis   = orientation:vertical\nAlignBars  = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n  color:steel width:20 align:left\n';
 	   for(var i=0;i<numBins;i++){
 	     chart += '  bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
 	   }
 	   //Add axis label
 	   chart += '  bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
 
 	   document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
           document.getElementById('wpPreview').click(); 
  	  }
 	 }
       } else {
             alert("There was a problem retrieving the XML data:\n" +
                 req.statusText);
       }
    }
 } 
 
 function generateStatistics(){
 
  template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
  template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
  queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml';
  pagesList = new Array();
  index = 0;
  loadXMLDocPassingTemplate(queryURL,getTemplateList,template);
 
 } 
 
 addOnloadHook(function () {
   if(document.___location.href.indexOf('User:Dr_pda/generatestats&action=edit') != -1){
     generateStatistics();
   }
 });
 
 //</pre>