User:BogBot/Source code/Task 03

This is an old revision of this page, as edited by Boghog (talk | contribs) at 20:35, 24 July 2011 (updated to include code to generate the {{drugs.com}} template). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
#!/usr/bin/python

# Bot Script to populate new clinical fields in Drugbox templates in Wikipedia drug articles.
# The new fields are:
# | tradename   =  <!-- comma separated list of tradenames --> 
# | Drugs.com   =  <!--  link to Drugs.com monograph, e.g., "lisinopril" that links to "http://www.drugs.com/monograph/lisinopril.html" -->
# | MedlinePlus =  <!-- MedlinePlus drug accession number, e.g.,  "a692051" that links to "http://www.nlm.nih.gov/medlineplus/druginfo/meds/a692051.html" --> 

"""{{Drugbox
| verifiedrevid = 408577806
| IUPAC_name        = 
| image             = 
| width             = 
| alt               = 
| image2            = 
| width2            = 
| alt2              = 
| imagename         = <!-- else may use drug_name -->
| drug_name         = <!-- else may use imagename -->
| caption           = 

<!--Clinical data-->
| tradename         =  
| Drugs.com         =
| MedlinePlus       =
| licence_EU        = <!-- EMA requires brand name -->
| licence_US        = <!-- FDA may use generic name -->
| DailyMedID        = <!-- preference to licence_US -->
| pregnancy_AU      = <!-- A / B1 / B2 / B3 / C / D / X -->
| pregnancy_US      = <!-- A / B            / C / D / X -->
| pregnancy_category= 
| legal_AU = <!-- S2, S3, S4, S5, S6, S7, S8, S9 or Unscheduled-->
| legal_CA = <!-- OTC, Rx-only, Schedule I, II, III, IV, V, VI, VII, VIII -->
| legal_UK = <!-- GSL, P, POM, CD, CD Lic, CD POM, CD No Reg POM, CD (Benz) POM, CD (Anab) POM or CD Inv POM -->
| legal_US = <!-- OTC / Rx-only / Schedule I, II, III, IV, V -->
| legal_status      = 
| dependency_liability = 
| routes_of_administration = 

<!--Pharmacokinetic data-->
| bioavailability   = 
| protein_bound     = 
| metabolism        = 
| elimination_half-life = 
| excretion         = 

<!--Identifiers-->
| CAS_number        = 
| CAS_supplemental  = 
| ATCvet            = 
| ATC_prefix        = <!-- 'none' if uncategorised -->
| ATC_suffix        = 
| ATC_supplemental  = 
| PubChem           = 
| PubChemSubstance  = 
| IUPHAR_ligand     = 
| DrugBank          = 
| ChemSpiderID      = 
| UNII              =
| KEGG              =
| ChEBI             =
| ChEMBL            =

<!--Chemical data-->
| chemical_formula  = 
| C= | H= | Ag= | As= | Au= | B= | Bi= | Br= | Cl= | Co= | F= | Fe= | Gd= | I=
| K= | Mn= | N= | Na= | O= | P= | Pt= | S= | Sb= | Se= | Sr= | Tc= | Zn= | charge=
| molecular_weight  = 
| smiles            = 
| StdInChI          =
| StdInChI_comment  =
| StdInChIKey       =
| synonyms          = 
| density           = 
| melting_point     = 
| melting_high      = 
| melting_notes     = 
| boiling_point     = 
| boiling_notes     = 
| solubility        = 
| specific_rotation = 
| sec_combustion    = 

<!--Combo data-->
| type              = combo
| drug_name         = 
| component1        = <!-- Drugname, automatically linked -->
| class1            = <!-- Group, manual link using [[..|..]] -->
| component2        = <!-- Drugname, automatically linked -->
| class2            = <!-- Group, manual link using [[..|..]] -->
| component3        = <!-- Drugname, automatically linked -->
| class3            = <!-- Group, manual link using [[..|..]] -->
| component4        = <!-- Drugname, automatically linked -->
| class4            = <!-- Group, manual link using [[..|..]] -->

<!--Monoclonal antibody data-->
| type              = mab
| image             = 
| width             = 
| alt               = 
| image2            = 
| width2            = 
| alt2              = 
| imagename         = <!-- else may use drug_name -->
| drug_name         = <!-- else may use imagename -->
| mab_type          = <!-- mab, Fab, F(ab')2, Fab', scFv, di-scFv, 3funct, clFab, BiTE -->
| source            = <!-- a, e, i, o, u, xi/a, zu/a, xizu/a, axo, ... -->
| target            = <!-- antigen -->

<!--Vacine data-->
| type              = vaccine
| image             = 
| alt               = 
| width             = 
| image2            = 
| alt2              = 
| width2            = 
| imagename         = <!-- else may use drug_name -->
| drug_name         = <!-- else may use imagename -->
| target            = <!-- the antigen/bacteria/toxin/virus to protect against -->
| vaccine_type      = <!-- killed/attenuated/live/toxoid/protein subunit/subunit/conjugate/recombinant/DNA -->

}}"""

import re
import string
import wikipedia
from collections import defaultdict
import urllib
import csv
import string

user =  "BogBot"

# compiled regular expression

regexp_ab = re.compile(r'\{\{(nobots|bots\|(allow=none|deny=.*?' + user + r'.*?|optout=all|deny=all))\}\}')
regexp_drug_infobox = re.compile(r"\{\{\s*(Drugbox|drugbox)\s*(?P<PARAMS>.+)\s*\}\}\s*", re.DOTALL)
regexp_param = re.compile(r"^\s?\|\s?(?P<PARAM>\S+)\s?=\s?(?P<VALUE>.+)$")
  
# Included for bot exclusion compliance (see http://en.wikipedia.org/wiki/Template:Bots)

def Allowbots(text):
    if (regexp_ab.search(text)):
        return False
    return True

# articles = open('/Users/BogBot/progs/pywikipedia/drugbox/drugbox_titles.txt', 'r')

drugbank_data = {}

#			0			1			2			3				4				5					6				7
#	Name	Trade_Names	Drug_Type	MedlinePlus	Drugs.com_link	KEGG_Drug_ID	KEGG_Compound_ID	ChemSpider_ID	PubChem_Compound_ID

drug_data = csv.reader(open('/Users/BogBot/progs/pywikipedia/drugbox/drug_links_agumented.csv', 'rU'),  dialect='excel')

# drugs.com root links:

roots = [("monograph","http://www.drugs.com/monograph/"), ("CDI","http://www.drugs.com/cdi/"), ("CONS","http://www.drugs.com/cons/"), ("MTM","http://www.drugs.com/mtm/"), ("parent","http://www.drugs.com/")]

for row in drug_data:
  drugbank_data[row[0]] = row[1:]


# articles = []

  articles = ["User:Boghog/Sandbox2"]
#  articles = ["Template:Drugbox/Lisinopril"]

# main loop

for article in articles:

  article = article.rstrip('\n')
#  INN     = article
  INN = "Lisinopril"
#  print article

  log_string = "* [[" + article + "]]" 
  print log_string,

  site = wikipedia.getSite()
  page = wikipedia.Page(site, article)
  text = page.get(get_redirect = True)

  if not Allowbots(text):
    break

  log_string = ", article: " + article
  print log_string

#  print text

  result_drug_infobox = regexp_drug_infobox.search(text)
  if result_drug_infobox:
#   print "found it!"
    parameters = result_drug_infobox.group('PARAMS')
    current_parameters = {}
#    print parameters
    lines = parameters.splitlines()
    for line in lines:
#      print line
      result_drug_param = regexp_param.search(line)
      if result_drug_param:
        parameter = result_drug_param.group('PARAM')
        value     = result_drug_param.group('VALUE')
        current_parameters[parameter] = value
    
#   print "INN: ", INN
    if INN in drugbank_data:
      data = drugbank_data[INN]
      
#     merge tradenames 
    
      merck_tradenames = sorted(set(data[0].split(";")))[1:]
      for index, object in enumerate(merck_tradenames):
        merck_tradenames[index]   = string.capitalize(string.strip(object.encode()))
      if current_parameters.has_key("tradename"): 
        current_tradenames = sorted(set(current_parameters['tradename'].split(", ")))
      else:
        current_tradenames = []
      for index, object in enumerate(current_tradenames):
        current_tradenames[index] = string.capitalize(string.strip(object.encode()))
      merged_tradenames = sorted(merck_tradenames + current_tradenames)
      new_tradenames    = sorted(set(merged_tradenames))
      current_parameters['tradename'] = ", ".join(new_tradenames)
#      print "merck tradenames: ", merck_tradenames
#      print "current tradenames: ", current_tradenames
#      print "new tradenames: ", current_parameters['tradename']
      
# test web page, returns "200" if OK:
# if urllib.urlopen(link).getcode() == 200:
# 200: ('OK', 'Request fulfilled, document follows')
# 404: (page not found)

#   add MedlinePlus parameter
# | MedlinePlus =  <!-- MedlinePlus drug accession number, e.g.,  "a692051" that links to "http://www.nlm.nih.gov/medlineplus/druginfo/meds/a692051.html" --> 

      link = "http://www.nlm.nih.gov/medlineplus/druginfo/meds/" + data[2] + ".html"
      if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
        current_parameters['MedlinePlus'] = data[2]
#       print "MedlinePlus: ", current_parameters['MedlinePlus']
             
#   add Drugs.com parameter
# | Drugs.com   =  <!--  link to Drugs.com monograph, e.g., "lisinopril" that links to "http://www.drugs.com/monograph/lisinopril.html" -->

      stems = []
      drugnames = []
      drugnames.append(INN)
      for tradename in new_tradenames:
        drugnames.append(tradename)
      for drugname in drugnames:
        drugname = string.lower(drugname)
        if (string.find(tradename, " ") > 0):
          stems.append(string.replace(drugname, " ", "_"))
          stems.append(string.replace(drugname, " ", "-"))
        else:
          stems.append(drugname)
      
      try:
        for root in roots:
          for stem in stems:
            link = root[1] + stem + ".html"
#            print "attempted Drugs.com link: ", link
            if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
              current_parameters['Drugs.com'] = "{{drugs.com|" + root[0] + "|" + stem + "}}"
              raise StopIteration()
#              print "Drugs.com: ", current_parameters['Drugs.com']
      except StopIteration:
        pass


#    for parameter, value in current_parameters.iteritems():
#      print parameter, ":", value

#   while we are at it, populate KEGG_Drug_ID, ChemSpider_ID, and PubChem_Compound_ID fields if missing

    if data[4] and not current_parameters.has_key("KEGG"):
      current_parameters['KEGG'] = data[4]

    if data[6] and not current_parameters.has_key("ChemSpiderID"):
      current_parameters['ChemSpiderID'] = data[6]

    if data[7] and not current_parameters.has_key("PubChem"):
      current_parameters['PubChem'] = data[7]

# build new drugbox template

    new_drugbox = "{{Drugbox\n"
    if current_parameters.has_key("verifiedrevid"):            new_drugbox += "| verifiedrevid = "            + current_parameters['verifiedrevid']            + "\n"
    if current_parameters.has_key("IUPAC_name"):               new_drugbox += "| IUPAC_name = "               + current_parameters['IUPAC_name']               + "\n"
    if current_parameters.has_key("image"):                    new_drugbox += "| image = "                    + current_parameters['image']                    + "\n"
    if current_parameters.has_key("width"):                    new_drugbox += "| width = "                    + current_parameters['width']                    + "\n"
    if current_parameters.has_key("alt"):                      new_drugbox += "| alt = "                      + current_parameters['alt']                      + "\n"
    if current_parameters.has_key("image2"):                   new_drugbox += "| image2 = "                   + current_parameters['image2']                   + "\n"
    if current_parameters.has_key("width2"):                   new_drugbox += "| width2 = "                   + current_parameters['width2']                   + "\n"
    if current_parameters.has_key("imagename"):                new_drugbox += "| imagename = "                + current_parameters['imagename']                + "\n"
    if current_parameters.has_key("drug_name"):                new_drugbox += "| drug_name = "                + current_parameters['drug_name']                + "\n"
    if current_parameters.has_key("caption"):                  new_drugbox += "| caption = "                  + current_parameters['caption']                  + "\n"
    
    if current_parameters.has_key("type"):

      if current_parameters['type'] == "combo":
        new_drugbox += "\n<!--Combo data-->\n"
        if current_parameters.has_key("type"):                 new_drugbox += "| type = "                     + current_parameters['type']                     + "\n"
        if current_parameters.has_key("component1"):           new_drugbox += "| component1 = "               + current_parameters['component1']               + "\n"
        if current_parameters.has_key("class1"):               new_drugbox += "| class1 = "                   + current_parameters['class1']                   + "\n"
        if current_parameters.has_key("component2"):           new_drugbox += "| component2 = "               + current_parameters['component2']               + "\n"
        if current_parameters.has_key("class2"):               new_drugbox += "| class2 = "                   + current_parameters['class2']                   + "\n"
        if current_parameters.has_key("component3"):           new_drugbox += "| component3 = "               + current_parameters['component3']               + "\n"
        if current_parameters.has_key("class3"):               new_drugbox += "| class3 = "                   + current_parameters['class3']                   + "\n"
        if current_parameters.has_key("component4"):           new_drugbox += "| component4 = "               + current_parameters['component4']               + "\n"
        if current_parameters.has_key("class4"):               new_drugbox += "| class4 = "                   + current_parameters['class4']                   + "\n"

      if current_parameters['type'] == "mab":
        new_drugbox += "\n<!--Monoclonal antibody data-->\n"
        if current_parameters.has_key("type"):                 new_drugbox += "| type = "                     + current_parameters['type']                     + "\n"
        if current_parameters.has_key("mab_type"):             new_drugbox += "| mab_type = "                 + current_parameters['mab_type']                 + "\n"
        if current_parameters.has_key("source"):               new_drugbox += "| source = "                   + current_parameters['source']                   + "\n"
        if current_parameters.has_key("target"):               new_drugbox += "| target = "                   + current_parameters['target']                   + "\n"

      if current_parameters['type'] == "vaccine":
        new_drugbox += "\n<!--Vacine data-->\n"
        if current_parameters.has_key("type"):                 new_drugbox += "| type = "                     + current_parameters['type']                     + "\n"
        if current_parameters.has_key("target"):               new_drugbox += "| target = "                   + current_parameters['target']                   + "\n"
        if current_parameters.has_key("vaccine_type"):         new_drugbox += "| vaccine_type = "             + current_parameters['vaccine_type']             + "\n"

    new_drugbox += "\n<!--Clinical data-->\n"
    if current_parameters.has_key("tradename"):                new_drugbox += "| tradename = "                + current_parameters['tradename']                + "\n"
    if current_parameters.has_key("Drugs.com"):                new_drugbox += "| Drugs.com = "                + current_parameters['Drugs.com']                + "\n"
    if current_parameters.has_key("MedlinePlus"):              new_drugbox += "| MedlinePlus = "              + current_parameters['MedlinePlus']              + "\n"
    if current_parameters.has_key("licence_EU"):               new_drugbox += "| licence_EU = "               + current_parameters['licence_EU']               + "\n"
    if current_parameters.has_key("licence_US"):               new_drugbox += "| licence_US = "               + current_parameters['licence_US']               + "\n"
    if current_parameters.has_key("DailyMedID"):               new_drugbox += "| DailyMedID = "               + current_parameters['DailyMedID']               + "\n"
    if current_parameters.has_key("pregnancy_AU"):             new_drugbox += "| pregnancy_AU = "             + current_parameters['pregnancy_AU']             + "\n"
    if current_parameters.has_key("pregnancy_US"):             new_drugbox += "| pregnancy_US = "             + current_parameters['pregnancy_US']             + "\n"
    if current_parameters.has_key("pregnancy_category"):       new_drugbox += "| pregnancy_category = "       + current_parameters['pregnancy_category']       + "\n"
    if current_parameters.has_key("legal_AU"):                 new_drugbox += "| legal_AU = "                 + current_parameters['legal_AU']                 + "\n"
    if current_parameters.has_key("legal_CA"):                 new_drugbox += "| legal_CA = "                 + current_parameters['legal_CA']                 + "\n"
    if current_parameters.has_key("legal_UK"):                 new_drugbox += "| legal_UK = "                 + current_parameters['legal_UK']                 + "\n"
    if current_parameters.has_key("legal_US"):                 new_drugbox += "| legal_US = "                 + current_parameters['legal_US']                 + "\n"
    if current_parameters.has_key("legal_status"):             new_drugbox += "| legal_status = "             + current_parameters['legal_status']             + "\n"
    if current_parameters.has_key("dependency_liability"):     new_drugbox += "| dependency_liability = "     + current_parameters['dependency_liability']     + "\n"
    if current_parameters.has_key("routes_of_administration"): new_drugbox += "| routes_of_administration = " + current_parameters['routes_of_administration'] + "\n"

    new_drugbox += "\n<!--Pharmacokinetic data-->\n"
    if current_parameters.has_key("bioavailability"):          new_drugbox += "| bioavailability = "           + current_parameters['bioavailability']         + "\n"
    if current_parameters.has_key("protein_bound"):            new_drugbox += "| protein_bound = "             + current_parameters['protein_bound']           + "\n"
    if current_parameters.has_key("metabolism"):               new_drugbox += "| metabolism = "                + current_parameters['metabolism']              + "\n"
    if current_parameters.has_key("elimination_half-life"):    new_drugbox += "| elimination_half-life = "     + current_parameters['elimination_half-life']   + "\n"
    if current_parameters.has_key("excretion"):                new_drugbox += "| excretion = "                 + current_parameters['excretion']               + "\n"
    
    new_drugbox += "\n<!--Identifiers-->\n"
    if current_parameters.has_key("CAS_number"):               new_drugbox += "| CAS_number = "                + current_parameters['CAS_number']              + "\n"
    if current_parameters.has_key("CAS_supplemental"):         new_drugbox += "| CAS_supplemental = "          + current_parameters['CAS_supplemental']        + "\n"
    if current_parameters.has_key("ATCvet"):                   new_drugbox += "| ATCvet = "                    + current_parameters['ATCvet']                  + "\n"
    if current_parameters.has_key("ATC_prefix"):               new_drugbox += "| ATC_prefix = "                + current_parameters['ATC_prefix']              + "\n"
    if current_parameters.has_key("ATC_suffix"):               new_drugbox += "| ATC_suffix = "                + current_parameters['ATC_suffix']              + "\n"
    if current_parameters.has_key("ATC_supplemental"):         new_drugbox += "| ATC_supplemental = "          + current_parameters['ATC_supplemental']        + "\n"
    if current_parameters.has_key("PubChem"):                  new_drugbox += "| PubChem = "                   + current_parameters['PubChem']                 + "\n"
    if current_parameters.has_key("PubChemSubstance"):         new_drugbox += "| PubChemSubstance = "          + current_parameters['PubChemSubstance']        + "\n"
    if current_parameters.has_key("IUPHAR_ligand"):            new_drugbox += "| IUPHAR_ligand = "             + current_parameters['IUPHAR_ligand']           + "\n"
    if current_parameters.has_key("DrugBank"):                 new_drugbox += "| DrugBank = "                  + current_parameters['DrugBank']                + "\n"
    if current_parameters.has_key("ChemSpiderID_Ref"):         new_drugbox += "| ChemSpiderID_Ref = "          + current_parameters['ChemSpiderID_Ref']        + "\n"
    if current_parameters.has_key("ChemSpiderID"):             new_drugbox += "| ChemSpiderID = "              + current_parameters['ChemSpiderID']            + "\n"
    if current_parameters.has_key("UNII_Ref"):                 new_drugbox += "| UNII_Ref = "                  + current_parameters['UNII_Ref']                + "\n"
    if current_parameters.has_key("UNII"):                     new_drugbox += "| UNII = "                      + current_parameters['UNII']                    + "\n"
    if current_parameters.has_key("KEGG_Ref"):                 new_drugbox += "| KEGG_Ref = "                  + current_parameters['KEGG_Ref']                + "\n"
    if current_parameters.has_key("KEGG"):                     new_drugbox += "| KEGG = "                      + current_parameters['KEGG']                    + "\n"
    if current_parameters.has_key("ChEBI_Ref"):                new_drugbox += "| ChEBI_Ref = "                 + current_parameters['ChEBI_Ref']               + "\n"
    if current_parameters.has_key("ChEBI"):                    new_drugbox += "| ChEBI = "                     + current_parameters['ChEBI']                   + "\n"
    if current_parameters.has_key("ChEMBL_Ref"):               new_drugbox += "| ChEMBL_Ref = "                + current_parameters['ChEMBL_Ref']              + "\n"
    if current_parameters.has_key("ChEMBL"):                   new_drugbox += "| ChEMBL = "                    + current_parameters['ChEMBL']                  + "\n"

    new_drugbox += "\n<!--Chemical data-->\n"
    if current_parameters.has_key("chemical_formula"):         new_drugbox += "| chemical_formula = "          + current_parameters['chemical_formula']        + "\n"
    if current_parameters.has_key("C"):                        new_drugbox += "| C="                           + current_parameters['C']                       + " "
    if current_parameters.has_key("H"):                        new_drugbox += "| H="                           + current_parameters['H']                       + " "
    if current_parameters.has_key("Ag"):                       new_drugbox += "| Ag="                          + current_parameters['Ag']                      + " "
    if current_parameters.has_key("As"):                       new_drugbox += "| As="                          + current_parameters['As']                      + " "
    if current_parameters.has_key("Au"):                       new_drugbox += "| Au="                          + current_parameters['Au']                      + " "
    if current_parameters.has_key("B"):                        new_drugbox += "| B="                           + current_parameters['B']                       + " "
    if current_parameters.has_key("Bi"):                       new_drugbox += "| Bi="                          + current_parameters['Bi']                      + " "
    if current_parameters.has_key("Br"):                       new_drugbox += "| Br="                          + current_parameters['Br']                      + " "
    if current_parameters.has_key("Cl"):                       new_drugbox += "| Cl="                          + current_parameters['Cl']                      + " "
    if current_parameters.has_key("Co"):                       new_drugbox += "| Co="                          + current_parameters['Co']                      + " "
    if current_parameters.has_key("F"):                        new_drugbox += "| F="                           + current_parameters['F']                       + " "
    if current_parameters.has_key("Fe"):                       new_drugbox += "| Fe="                          + current_parameters['Fe']                      + " "
    if current_parameters.has_key("Gd"):                       new_drugbox += "| Gd="                          + current_parameters['Gd']                      + " "
    if current_parameters.has_key("I"):                        new_drugbox += "| I="                           + current_parameters['I']                       + " "
    if current_parameters.has_key("K"):                        new_drugbox += "| K="                           + current_parameters['K']                       + " "
    if current_parameters.has_key("Mn"):                       new_drugbox += "| Mn="                          + current_parameters['Mn']                      + " "
    if current_parameters.has_key("N"):                        new_drugbox += "| N="                           + current_parameters['N']                       + " "
    if current_parameters.has_key("Na"):                       new_drugbox += "| Na="                          + current_parameters['Na']                      + " "
    if current_parameters.has_key("O"):                        new_drugbox += "| O="                           + current_parameters['O']                       + " "
    if current_parameters.has_key("P"):                        new_drugbox += "| P="                           + current_parameters['P']                       + " "
    if current_parameters.has_key("Pt"):                       new_drugbox += "| Pt="                          + current_parameters['Pt']                      + " "
    if current_parameters.has_key("S"):                        new_drugbox += "| S="                           + current_parameters['S']                       + " "
    if current_parameters.has_key("Sb"):                       new_drugbox += "| C="                           + current_parameters['Sb']                      + " "
    if current_parameters.has_key("Se"):                       new_drugbox += "| Se="                          + current_parameters['Se']                      + " "
    if current_parameters.has_key("Sr"):                       new_drugbox += "| Sr="                          + current_parameters['Sr']                      + " "
    if current_parameters.has_key("Tc"):                       new_drugbox += "| Tc="                          + current_parameters['Tc']                      + " "
    if current_parameters.has_key("charge"):                   new_drugbox += "| charge = "                    + current_parameters['charge']                  + " "
    new_drugbox += "\n"
    if current_parameters.has_key("molecular_weight"):         new_drugbox += "| molecular_weight = "          + current_parameters['molecular_weight']        + "\n"
    if current_parameters.has_key("smiles"):                   new_drugbox += "| smiles = "                    + current_parameters['smiles']                  + "\n"
    if current_parameters.has_key("InChI_Ref"):                new_drugbox += "| InChI_Ref = "                 + current_parameters['InChI_Ref']               + "\n"
    if current_parameters.has_key("InChI"):                    new_drugbox += "| InChI = "                     + current_parameters['InChI']                   + "\n"
    if current_parameters.has_key("StdInChI_Ref"):             new_drugbox += "| StdInChI_Ref = "              + current_parameters['StdInChI_Ref']            + "\n"
    if current_parameters.has_key("StdInChI"):                 new_drugbox += "| StdInChI = "                  + current_parameters['StdInChI']                + "\n"
    if current_parameters.has_key("StdInChI_comment"):         new_drugbox += "| StdInChI_comment = "          + current_parameters['StdInChI_comment']        + "\n"
    if current_parameters.has_key("StdInChIKey_Ref"):          new_drugbox += "| StdInChIKey_Ref = "           + current_parameters['StdInChIKey_Ref']         + "\n"
    if current_parameters.has_key("StdInChIKey"):              new_drugbox += "| StdInChIKey = "               + current_parameters['StdInChIKey']             + "\n"
    if current_parameters.has_key("synonyms"):                 new_drugbox += "| synonyms = "                  + current_parameters['synonyms']                + "\n"
    if current_parameters.has_key("density"):                  new_drugbox += "| density = "                   + current_parameters['density']                 + "\n"
    if current_parameters.has_key("melting_point"):            new_drugbox += "| melting_point = "             + current_parameters['melting_point']           + "\n"
    if current_parameters.has_key("boiling_point"):            new_drugbox += "| boiling_point = "             + current_parameters['boiling_point']           + "\n"
    if current_parameters.has_key("boiling_notes"):            new_drugbox += "| boiling_notes = "             + current_parameters['boiling_notes']           + "\n"
    if current_parameters.has_key("solubility"):               new_drugbox += "| solubility = "                + current_parameters['solubility']              + "\n"
    if current_parameters.has_key("specific_rotation"):        new_drugbox += "| specific_rotation = "         + current_parameters['specific_rotation']       + "\n"
    if current_parameters.has_key("sec_combustion"):           new_drugbox += "| sec_combustion = "            + current_parameters['sec_combustion']          + "\n"

    new_drugbox += "}}\n"
#   print new_drugbox

# replace original drugbox with new drugbox
  new_text = re.sub(regexp_drug_infobox, new_drugbox, text)
# print new_text

  page.put(new_text, comment='populated clinical fields in drugbox', watchArticle = None, minorEdit = True)
  print ", page updated"
    
wikipedia.stopme()