User:BogBot/Source code/Task 03: Difference between revisions

Content deleted Content added
substantial rewrite using function calls
more tweaks
Line 155:
 
# ATC_supplemental = {{ATC|B01|AC06}}, {{ATC|N02|BA01}}
regexp_ATC_supplemental = re.compile(r"\|\s*?ATC_supplemental\s*?=\s*?(?P<TEMPLATE>.*\{\{s*(ATC).+?\}\})\s*?($|\|)")
# CASNo_Ref = {{cascite|correct|CAS}}
regexp_CASNo_Ref = re.compile(r"\|\s??CASNo_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Cascite|cascite).+?\}\})")
# ChEMBL_Ref = {{ebicite|correct|EBI}}
regexp_ChEMBL_Ref = re.compile(r"\|\s??ChEMBL_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Ebicite|ebicite).+?\}\})")
# ChemSpiderID_Ref = {{chemspidercite|correct|chemspider}}
regexp_ChemSpiderID_Ref = re.compile(r"\|\s??ChemSpiderID_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Chemspidercite|chemspidercite).+?\}\})")
# Drugs.com = {{drugs.com|monograph|lisinopril}}
regexp_Drugs_com = re.compile(r"\|\s??Drugs\.com\s?=\s?(?P<TEMPLATE>\{\{s*(Drugs\.com|drugs\.com).+?\}\})")
# KEGG_Ref = {{keggcite|correct|kegg}}
regexp_KEGG_Ref = re.compile(r"\|\s??KEGG_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Keggcite|keggcite).+?\}\})")
# StdInChI_Ref = {{stdinchicite|correct|chemspider}}
regexp_StdInChI_Ref = re.compile(r"\|\s??StdInChI_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Stdinchicite|stdinchicite).+?\}\})")
# StdInChIKey_Ref = {{stdinchicite|correct|chemspider}}
regexp_StdInChIKey_Ref = re.compile(r"\|\s??StdInChIKey_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Stdinchicite|stdinchicite).+?\}\})")
# UNII_Ref = {{fdacite|changed|FDA}}
regexp_UNII_Ref = re.compile(r"\|\s??UNII_Ref\s?=\s?(?P<TEMPLATE>\{\{s*(Fdacite|fdacite).+?\}\})")
 
def Allowbots(text):
Line 184:
result_text = search_result.group(0) # returns the entire matching sequence
begin, end = search_result.span()
print
else:
return None
Line 210 ⟶ 209:
# Name Trade_Names Drug_Type MedlinePlus Drugs.com_link KEGG_Drug_ID KEGG_Compound_ID ChemSpider_ID PubChem_Compound_ID
 
drug_data = csv.reader(open('/Users/BogHogbogbot/progs/pywikipedia/drugbox/drug_links_agumented.csv', 'rU'), dialect='excel')
 
for row in drug_data:
Line 298 ⟶ 297:
# do the same thing for nested templates
# forever = True
# while forever:
# print "new_list: ", new_list
# forever = False
# for sub_string in sub_strings[1:]:
# print "sub_string: ", sub_string
# if ('}}' in sub_string) and ((not '{{' in sub_string) or sub_string.find('}}') < sub_string.find('{{')):
# new_list[-1] = new_list[-1] + '|' + sub_string
# forever = True
# else:
# new_list.append(sub_string)
# sub_strings = new_list
# print "new_list: ", new_list
Line 337 ⟶ 336:
def build_new_drugbox(current_parameters):
# build new drugbox template
 
# if type parameter is missing, check subordinate parameters that infer type, and if found, assign type
 
if not current_parameters.has_key("type"):
if ("component1" in current_parameters or
"class1" in current_parameters or
"component2" in current_parameters or
"class2" in current_parameters or
"component3" in current_parameters or
"class3" in current_parameters or
"component4" in current_parameters
or "class4" in current_parameters):
current_parameters['type'] = "combo"
elif ("mab_type") in current_parameters:
current_parameters['type'] = "mab"
elif ("vaccine_type") in current_parameters:
current_parameters['type'] = "vaccine"
 
new_drugbox = ""
Line 380 ⟶ 396:
if current_parameters.has_key("vaccine_type"): new_drugbox += "| vaccine_type = " + current_parameters['vaccine_type'] + "\n"
 
if ("tradename" in current_parameters or "Drugs.com" in current_parameters or "MedlinePlus" in current_parameters or "licence_EU" in current_parameters or
new_drugbox += "\n<!--Clinical data-->\n"
"licence_US" in current_parameters or "DailyMedID" in current_parameters or "pregnancy_AU" in current_parameters or "pregnancy_US" in current_parameters or
if current_parameters.has_key("tradename"): new_drugbox += "| tradename = " + current_parameters['tradename'] + "\n"
"pregnancy_category" in current_parameters or "legal_AU" in current_parameters or "legal_CA" in current_parameters or "legal_UK" in current_parameters or
if current_parameters.has_key("Drugs.com"): new_drugbox += "| Drugs.com = " + current_parameters['Drugs.com'] + "\n"
"legal_US" in current_parameters or "legal_status" in current_parameters or "dependency_liability" or "routes_of_administration" in current_parameters):
if current_parameters.has_key("MedlinePlus"): new_drugbox += "| MedlinePlus = " + current_parameters['MedlinePlus'] + "\n"
if current_parameters.has_key("licence_EU"): new_drugbox += "| licence_EU = " + current_parameters['licence_EU'] + "\n"
if current_parameters.has_key("licence_US"): new_drugbox += "| licence_US = " + current_parameters['licence_US'] + "\n"
if current_parameters.has_key("DailyMedID"): new_drugbox += "| DailyMedID = " + current_parameters['DailyMedID'] + "\n"
if current_parameters.has_key("pregnancy_AU"): new_drugbox += "| pregnancy_AU = " + current_parameters['pregnancy_AU'] + "\n"
if current_parameters.has_key("pregnancy_US"): new_drugbox += "| pregnancy_US = " + current_parameters['pregnancy_US'] + "\n"
if current_parameters.has_key("pregnancy_category"): new_drugbox += "| pregnancy_category = " + current_parameters['pregnancy_category'] + "\n"
if current_parameters.has_key("legal_AU"): new_drugbox += "| legal_AU = " + current_parameters['legal_AU'] + "\n"
if current_parameters.has_key("legal_CA"): new_drugbox += "| legal_CA = " + current_parameters['legal_CA'] + "\n"
if current_parameters.has_key("legal_UK"): new_drugbox += "| legal_UK = " + current_parameters['legal_UK'] + "\n"
if current_parameters.has_key("legal_US"): new_drugbox += "| legal_US = " + current_parameters['legal_US'] + "\n"
if current_parameters.has_key("legal_status"): new_drugbox += "| legal_status = " + current_parameters['legal_status'] + "\n"
if current_parameters.has_key("dependency_liability"): new_drugbox += "| dependency_liability = " + current_parameters['dependency_liability'] + "\n"
if current_parameters.has_key("routes_of_administration"): new_drugbox += "| routes_of_administration = " + current_parameters['routes_of_administration'] + "\n"
 
new_drugbox += "\n<!--PharmacokineticClinical data-->\n"
if current_parameters.has_key("bioavailabilitytradename"): new_drugbox += "| bioavailabilitytradename = " + current_parameters['bioavailabilitytradename'] + "\n"
if current_parameters.has_key("protein_boundDrugs.com"): new_drugbox += "| protein_boundDrugs.com = " + current_parameters['protein_boundDrugs.com'] + "\n"
if current_parameters.has_key("metabolismMedlinePlus"): new_drugbox += "| metabolismMedlinePlus = " + current_parameters['metabolismMedlinePlus'] + "\n"
if current_parameters.has_key("elimination_half-lifelicence_EU"): new_drugbox += "| elimination_half-lifelicence_EU = " + current_parameters['elimination_half-lifelicence_EU'] + "\n"
if current_parameters.has_key("excretionlicence_US"): new_drugbox += "| excretionlicence_US = " + current_parameters['excretionlicence_US'] + "\n"
if current_parameters.has_key("DailyMedID"): new_drugbox += "| DailyMedID = " + current_parameters['DailyMedID'] + "\n"
if current_parameters.has_key("pregnancy_AU"): new_drugbox += "| pregnancy_AU = " + current_parameters['pregnancy_AU'] + "\n"
if current_parameters.has_key("pregnancy_US"): new_drugbox += "| pregnancy_US = " + current_parameters['pregnancy_US'] + "\n"
if current_parameters.has_key("pregnancy_category"): new_drugbox += "| pregnancy_category = " + current_parameters['pregnancy_category'] + "\n"
if current_parameters.has_key("legal_AU"): new_drugbox += "| legal_AU = " + current_parameters['legal_AU'] + "\n"
if current_parameters.has_key("legal_CA"): new_drugbox += "| legal_CA = " + current_parameters['legal_CA'] + "\n"
if current_parameters.has_key("legal_UK"): new_drugbox += "| legal_UK = " + current_parameters['legal_UK'] + "\n"
if current_parameters.has_key("legal_US"): new_drugbox += "| legal_US = " + current_parameters['legal_US'] + "\n"
if current_parameters.has_key("legal_status"): new_drugbox += "| legal_status = " + current_parameters['legal_status'] + "\n"
if current_parameters.has_key("dependency_liability"): new_drugbox += "| dependency_liability = " + current_parameters['dependency_liability'] + "\n"
if current_parameters.has_key("routes_of_administration"): new_drugbox += "| routes_of_administration = " + current_parameters['routes_of_administration'] + "\n"
 
if ("bioavailability" in current_parameters or "protein_bound metabolism" in current_parameters or "elimination_half-life" in current_parameters or "excretion" in current_parameters):
 
new_drugbox += "\n<!--Pharmacokinetic data-->\n"
if current_parameters.has_key("bioavailability"): new_drugbox += "| bioavailability = " + current_parameters['bioavailability'] + "\n"
if current_parameters.has_key("protein_bound"): new_drugbox += "| protein_bound = " + current_parameters['protein_bound'] + "\n"
if current_parameters.has_key("metabolism"): new_drugbox += "| metabolism = " + current_parameters['metabolism'] + "\n"
if current_parameters.has_key("elimination_half-life"): new_drugbox += "| elimination_half-life = " + current_parameters['elimination_half-life'] + "\n"
if current_parameters.has_key("excretion"): new_drugbox += "| excretion = " + current_parameters['excretion'] + "\n"
if ("CAS_number" in current_parameters or "CAS_supplemental" in current_parameters or "ATCvet" in current_parameters or "ATC_prefix" in current_parameters or
new_drugbox += "\n<!--Identifiers-->\n"
"ATC_suffix" in current_parameters or "ATC_supplemental" in current_parameters or "PubChem" in current_parameters or "PubChemSubstance" in current_parameters or
if current_parameters.has_key("CAS_number"): new_drugbox += "| CAS_number = " + current_parameters['CAS_number'] + "\n"
"IUPHAR_ligand" in current_parameters or "DrugBank" in current_parameters or "ChemSpiderID" in current_parameters or "UNII" in current_parameters or
if current_parameters.has_key("CAS_supplemental"): new_drugbox += "| CAS_supplemental = " + current_parameters['CAS_supplemental'] + "\n"
"KEGG" in current_parameters or "ChEBI" in current_parameters or "ChEMBL" in current_parameters):
if current_parameters.has_key("ATCvet"): new_drugbox += "| ATCvet = " + current_parameters['ATCvet'] + "\n"
new_drugbox += "\n<!--Identifiers-->\n"
if current_parameters.has_key("ATC_prefixCAS_number"): new_drugbox += "| ATC_prefixCAS_number = " + current_parameters['ATC_prefixCAS_number'] + "\n"
if current_parameters.has_key("ATC_suffixCAS_supplemental"): new_drugbox += "| ATC_suffixCAS_supplemental = " + current_parameters['ATC_suffixCAS_supplemental'] + "\n"
if current_parameters.has_key("ATC_supplementalATCvet"): new_drugbox += "| ATC_supplementalATCvet = " + current_parameters['ATC_supplementalATCvet'] + "\n"
if current_parameters.has_key("PubChemATC_prefix"): new_drugbox += "| PubChemATC_prefix = " + current_parameters['PubChemATC_prefix'] + "\n"
if current_parameters.has_key("PubChemSubstanceATC_suffix"): new_drugbox += "| PubChemSubstanceATC_suffix = " + current_parameters['PubChemSubstanceATC_suffix'] + "\n"
if current_parameters.has_key("IUPHAR_ligandATC_supplemental"): new_drugbox += "| IUPHAR_ligandATC_supplemental = " + current_parameters['IUPHAR_ligandATC_supplemental'] + "\n"
if current_parameters.has_key("DrugBankPubChem"): new_drugbox += "| DrugBankPubChem = " + current_parameters['DrugBankPubChem'] + "\n"
if current_parameters.has_key("ChemSpiderID_RefPubChemSubstance"): new_drugbox += "| ChemSpiderID_RefPubChemSubstance = " + current_parameters['ChemSpiderID_RefPubChemSubstance'] + "\n"
if current_parameters.has_key("ChemSpiderIDIUPHAR_ligand"): new_drugbox += "| ChemSpiderIDIUPHAR_ligand = " + current_parameters['ChemSpiderIDIUPHAR_ligand'] + "\n"
if current_parameters.has_key("UNII_RefDrugBank"): new_drugbox += "| UNII_RefDrugBank = " + current_parameters['UNII_RefDrugBank'] + "\n"
if current_parameters.has_key("UNIIChemSpiderID_Ref"): new_drugbox += "| UNIIChemSpiderID_Ref = " + current_parameters['UNIIChemSpiderID_Ref'] + "\n"
if current_parameters.has_key("KEGG_RefChemSpiderID"): new_drugbox += "| KEGG_RefChemSpiderID = " + current_parameters['KEGG_RefChemSpiderID'] + "\n"
if current_parameters.has_key("KEGGUNII_Ref"): new_drugbox += "| KEGGUNII_Ref = " + current_parameters['KEGGUNII_Ref'] + "\n"
if current_parameters.has_key("ChEBI_RefUNII"): new_drugbox += "| ChEBI_RefUNII = " + current_parameters['ChEBI_RefUNII'] + "\n"
if current_parameters.has_key("ChEBIKEGG_Ref"): new_drugbox += "| ChEBIKEGG_Ref = " + current_parameters['ChEBIKEGG_Ref'] + "\n"
if current_parameters.has_key("ChEMBL_RefKEGG"): new_drugbox += "| ChEMBL_RefKEGG = " + current_parameters['ChEMBL_RefKEGG'] + "\n"
if current_parameters.has_key("ChEMBLChEBI_Ref"): new_drugbox += "| ChEMBLChEBI_Ref = " + current_parameters['ChEMBLChEBI_Ref'] + "\n"
if current_parameters.has_key("ChEBI"): new_drugbox += "| ChEBI = " + current_parameters['ChEBI'] + "\n"
if current_parameters.has_key("ChEMBL_Ref"): new_drugbox += "| ChEMBL_Ref = " + current_parameters['ChEMBL_Ref'] + "\n"
if current_parameters.has_key("ChEMBL"): new_drugbox += "| ChEMBL = " + current_parameters['ChEMBL'] + "\n"
 
new_drugbox += "\n<!--Chemical data-->\n"
if current_parameters.has_key("chemical_formula"): in current_parameters or "C" in current_parameters new_drugbox +=or "| chemical_formula = H" in current_parameters or "Ag" in current_parameters or "As" +in current_parameters['chemical_formula'] or + "\n"
"Au" in current_parameters or "B" in current_parameters or "Bi" in current_parameters or "Br" in current_parameters or "Cl" in current_parameters or "Co" in current_parameters or
if current_parameters.has_key("C"): new_drugbox += "| C=" + current_parameters['C'] + " "
"F" in current_parameters or "Fe" in current_parameters or "Gd" in current_parameters or "I" in current_parameters or "K" in current_parameters or "Mn" in current_parameters or
if current_parameters.has_key("H"): new_drugbox += "| H=" + current_parameters['H'] + " "
"N" in current_parameters or "Na" in current_parameters or "O" in current_parameters or "P" in current_parameters or "Pt" in current_parameters or "S" in current_parameters or
if current_parameters.has_key("Ag"): new_drugbox += "| Ag=" + current_parameters['Ag'] + " "
"Sb" in current_parameters or "Se" in current_parameters or "Sr" in current_parameters or "Tc" in current_parameters or "charge" in current_parameters):
if current_parameters.has_key("As"): new_drugbox += "| As=" + current_parameters['As'] + " "
if current_parameters.has_key("Auchemical_formula"): new_drugbox += "| Auchemical_formula = " + current_parameters['Auchemical_formula'] + " \n"
if current_parameters.has_key("BC"): new_drugbox += "| BC=" + current_parameters['BC'] + " "
if current_parameters.has_key("BiH"): new_drugbox += "| BiH=" + current_parameters['BiH'] + " "
if current_parameters.has_key("BrAg"): new_drugbox += "| BrAg=" + current_parameters['BrAg'] + " "
if current_parameters.has_key("ClAs"): new_drugbox += "| ClAs=" + current_parameters['ClAs'] + " "
if current_parameters.has_key("CoAu"): new_drugbox += "| CoAu=" + current_parameters['CoAu'] + " "
if current_parameters.has_key("FB"): new_drugbox += "| FB=" + current_parameters['FB'] + " "
if current_parameters.has_key("FeBi"): new_drugbox += "| FeBi=" + current_parameters['FeBi'] + " "
if current_parameters.has_key("GdBr"): new_drugbox += "| GdBr=" + current_parameters['GdBr'] + " "
if current_parameters.has_key("ICl"): new_drugbox += "| ICl=" + current_parameters['ICl'] + " "
if current_parameters.has_key("KCo"): new_drugbox += "| KCo=" + current_parameters['KCo'] + " "
if current_parameters.has_key("MnF"): new_drugbox += "| MnF=" + current_parameters['MnF'] + " "
if current_parameters.has_key("NFe"): new_drugbox += "| NFe=" + current_parameters['NFe'] + " "
if current_parameters.has_key("NaGd"): new_drugbox += "| NaGd=" + current_parameters['NaGd'] + " "
if current_parameters.has_key("OI"): new_drugbox += "| OI=" + current_parameters['OI'] + " "
if current_parameters.has_key("PK"): new_drugbox += "| PK=" + current_parameters['PK'] + " "
if current_parameters.has_key("PtMn"): new_drugbox += "| PtMn=" + current_parameters['PtMn'] + " "
if current_parameters.has_key("SN"): new_drugbox += "| SN=" + current_parameters['SN'] + " "
if current_parameters.has_key("SbNa"): new_drugbox += "| CNa=" + current_parameters['SbNa'] + " "
if current_parameters.has_key("SeO"): new_drugbox += "| SeO=" + current_parameters['SeO'] + " "
if current_parameters.has_key("SrP"): new_drugbox += "| SrP=" + current_parameters['SrP'] + " "
if current_parameters.has_key("TcPt"): new_drugbox += "| TcPt=" + current_parameters['TcPt'] + " "
if current_parameters.has_key("chargeS"): new_drugbox += "| charge S= " + current_parameters['chargeS'] + " "
if current_parameters.has_key("Sb"): new_drugbox += "| C=" + current_parameters['Sb'] + " "
new_drugbox += "\n"
if current_parameters.has_key("Se"): new_drugbox += "| Se=" + current_parameters['Se'] + " "
if current_parameters.has_key("Sr"): new_drugbox += "| Sr=" + current_parameters['Sr'] + " "
if current_parameters.has_key("Tc"): new_drugbox += "| Tc=" + current_parameters['Tc'] + " "
if current_parameters.has_key("charge"): new_drugbox += "| charge = " + current_parameters['charge'] + " "
new_drugbox += "\n"
if current_parameters.has_key("molecular_weight"): new_drugbox += "| molecular_weight = " + current_parameters['molecular_weight'] + "\n"
if current_parameters.has_key("smiles"): new_drugbox += "| smiles = " + current_parameters['smiles'] + "\n"
Line 570 ⟶ 603:
for root in roots:
for stem in stems:
if stem:
link = root[1] + stem + ".html"
# print "attempted Drugs.com link: ", link
if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
# link = "{{drugs.com|" + root[0] +print "|"passed +link: stem", + "}}"link
raise StopIteration() link = "{{drugs.com|" + root[0] + "|" + stem + "}}"
else:
link = ""
raise StopIteration()
except StopIteration:
pass
Line 586 ⟶ 623:
# list of articles to work on is generated by: "python pagegenerators.py -namespace:0 -transcludes:Drugbox > drugbox_titles.txt"
# articles = []
# articles = codecs.open('/Users/BogHogbogbot/progs/pywikipedia/drugbox/drugbox_titles.txt', mode = 'r', encoding='utf-8')
 
articles = ["Template:Drugbox/LisinoprilAspirin"]
 
for article in articles:
Line 611 ⟶ 648:
parameters = text[begin:end]
log_string = ", article: " + article
print log_string,
else:
log_string = ", article: " + article + "drugbox not found!"
print log_string,
break
 
Line 630 ⟶ 667:
 
INN = article
# INN = "Acetylsalicylic acid"
if INN in drugbank_data:
db_data = drugbank_data[INN]
Line 653 ⟶ 691:
if db_data[0]:
merck_tradename = db_data[0]
else:
merck_tradename = ""
else:
merck_tradename = ""
Line 670 ⟶ 710:
# add Drugs.com link
resultif = test_Drugs_comcurrent_parameters.has_key(INN, current_parameters['tradename'], db_data[3]):
tradename = current_parameters['tradename']
else:
tradename = ""
if db_data:
if db_data[3]:
drugbank_drugs_com = db_data[3]
else:
drugbank_drugs_com = ""
else:
drugbank_drugs_com = ""
result = test_Drugs_com(INN, tradename, drugbank_drugs_com)
if result: current_parameters['Drugs.com'] = result
 
new_text = text[:begin-1] + build_new_drugbox(current_parameters) + text[end:]
# print new_textcurrent_parameters
# print new_text
if current_parameters:
# page.put(new_text, comment='populated clinical fields in drugbox per [[Wikipedia:Bots/Requests_for_approval/BogBot_2|bot approval]]', watchArticle = None, minorEdit = True)
print ", page updated"
else: