User:BogBot/Source code/Task 03: Difference between revisions

Content deleted Content added
updated code
more robust searches for drugs.com monograph external links; improved log file report; added a couple of missing drugbox parameters
Line 192:
regexp_ref_tag_end = re.compile(r"</ref>")
regexp_citation_template = re.compile(r"\{\{[C|c]ite\s*?(?P<TEMPLATE>.*?)\}\}")
 
# href='/monograph/maprotiline-hydrochloride.html'
regexp_monograph_url = re.compile("href='/monograph/(?P<STEM>.*?)\.html'", re.DOTALL)
 
def Allowbots(text):
Line 588 ⟶ 591:
"KEGG" in current_parameters or "ChEBI" in current_parameters or "ChEMBL" in current_parameters):
new_drugbox += "\n<!--Identifiers-->\n"
if current_parameters.has_key("CASNo_RefCAS_number_Ref"): new_drugbox += "| CASNo_RefCAS_number_Ref = " + current_parameters['CASNo_RefCAS_number_Ref'] + "\n"
if current_parameters.has_key("CASNo_Ref"): new_drugbox += "| CASNo_Ref = " + current_parameters['CASNo_Ref'] + "\n"
if current_parameters.has_key("CAS_number"): new_drugbox += "| CAS_number = " + current_parameters['CAS_number'] + "\n"
if current_parameters.has_key("CAS_supplemental"): new_drugbox += "| CAS_supplemental = " + current_parameters['CAS_supplemental'] + "\n"
Line 598 ⟶ 602:
if current_parameters.has_key("PubChemSubstance"): new_drugbox += "| PubChemSubstance = " + current_parameters['PubChemSubstance'] + "\n"
if current_parameters.has_key("IUPHAR_ligand"): new_drugbox += "| IUPHAR_ligand = " + current_parameters['IUPHAR_ligand'] + "\n"
if current_parameters.has_key("DrugBank_Ref"): new_drugbox += "| DrugBank_Ref = " + current_parameters['DrugBank_Ref'] + "\n"
if current_parameters.has_key("DrugBank"): new_drugbox += "| DrugBank = " + current_parameters['DrugBank'] + "\n"
if current_parameters.has_key("ChemSpiderID_Ref"): new_drugbox += "| ChemSpiderID_Ref = " + current_parameters['ChemSpiderID_Ref'] + "\n"
Line 650 ⟶ 655:
if current_parameters.has_key("InChI_Ref"): new_drugbox += "| InChI_Ref = " + current_parameters['InChI_Ref'] + "\n"
if current_parameters.has_key("InChI"): new_drugbox += "| InChI = " + current_parameters['InChI'] + "\n"
if current_parameters.has_key("InChIKey"): new_drugbox += "| InChIKey = " + current_parameters['InChIKey'] + "\n"
if current_parameters.has_key("StdInChI_Ref"): new_drugbox += "| StdInChI_Ref = " + current_parameters['StdInChI_Ref'] + "\n"
if current_parameters.has_key("StdInChI"): new_drugbox += "| StdInChI = " + current_parameters['StdInChI'] + "\n"
Line 753 ⟶ 759:
else:
stems.append(drugname)
# also try common salts
stems.append(drugname + "-hydrochloride")
stems.append(drugname + "-sulfate")
stems.append(drugname + "-chloride")
stems.append(drugname + "-sodium")
stems.append(drugname + "-bromide")
stems.append(drugname + "-maleate")
stems.append(drugname + "-citrate")
 
if drugbank_drugs_com:
if (string.find(drugbank_drugs_com, "http://www.drugs.com/") > -1):
Line 768 ⟶ 782:
if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
# print "passed link: ", link
link = "{{drugs.com|" +if root[0] + "|" + stem +== "}}monograph":
raise StopIteration() link = "{{drugs.com|" + root[0] + "|" + stem + "}}"
raise StopIteration()
else:
opener = urllib.FancyURLopener({})
f = opener.open(link)
text = f.read()
result = regexp_monograph_url.search(text)
if result:
stem = result.group('STEM')
link = "{{drugs.com|" + roots[0][0] + "|" + stem + "}}"
raise StopIteration()
else:
link = ""
Line 799 ⟶ 823:
return False
 
def savepage(page, text, summary = '', minor = False, log_string = ""):
"""Save text to a page and log exceptions."""
if summary != '':
Line 805 ⟶ 829:
try:
page.put(text, minorEdit = minor)
wikipedia.output('%s \03{green}saving %s' % (log_string, page.title()) )
return ''
except wikipedia.LockedPage:
wikipedia.output('%s \03{red}cannot save %s because it is locked\03{default}' % (log_string, page.title()) )
return '# %s: page was locked\n' % page.aslink()
except wikipedia.EditConflict:
wikipedia.output('%s \03{red}cannot save %s because of edit conflict\03{default}' % (log_string, page.title()) )
return '# %s: edit conflict occurred\n' % page.aslink()
except wikipedia.SpamfilterError, error:
wikipedia.output('%s \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % ((log_string, page.title(), error.url)) )
return '# %s: spam blacklist entry\n' % page.aslink()
except:
wikipedia.output('%s \03{red}unknown error on saving %s\03{default}' % (log_string, page.title()) )
return '# %s: unknown error occurred\n' % page.aslink()
 
Line 846 ⟶ 870:
new_drugbox = ""
 
log_string = "* [[" + article + "]], "
print log_string,
 
site = wikipedia.getSite()
Line 859 ⟶ 882:
if begin:
parameters = text[begin:end]
log_string = ", article: " + article
print log_string,
else:
log_string = ", article: " + articlelog_string + "drugbox not found!"
print log_string
continue
Line 871 ⟶ 892:
 
if unbalanced(parameters):
log_string = ", article: " + articlelog_string + ", unmatched brackets found, article skipped!"
print log_string
continue
Line 896 ⟶ 917:
db_data = drugbank_data[INN]
elif "DrugBank" in current_parameters and current_parameters['DrugBank'] in DrugBank_ID_INN:
printlog_string = log_string + "INN reset from ", INN,
INN = DrugBank_ID_INN[current_parameters['DrugBank']]
printlog_string = log_string + "to ", + INN, ", ",
db_data = drugbank_data[INN]
if not "drug_name" in current_parameters:
Line 907 ⟶ 928:
if "DrugBank" in current_parameters and current_parameters['DrugBank'] in DrugBank_ID_INN:
if DrugBank_ID_INN[current_parameters['DrugBank']] == INN:
printlog_string = log_string + "DrugBankID/INN OK!, ",
else:
printlog_string = log_string + "DrugBankID/INN NOT OK!, ",
else:
if db_data:
if db_data[8]:
current_parameters['if not "DrugBank']" =in db_data[8]current_parameters:
current_parameters['DrugBank'] = db_data[8]
 
Line 970 ⟶ 992:
new_text = text[:begin] + build_new_drugbox(current_parameters) + text[end:]
# print build_new_drugbox(current_parameters)
 
# print current_parameters
# print new_text
Line 975 ⟶ 999:
if current_parameters:
comment='populated new fields in drugbox and reordered per [[Wikipedia:Bots/Requests_for_approval/BogBot_2|bot approval]]'
status = savepage(page, new_text, comment, minorFalse, = Falselog_string)
print ", ", status
else:
print ", page not updated"
Line 983 ⟶ 1,006:
run()
 
</source>