Content deleted Content added
updated code |
more robust searches for drugs.com monograph external links; improved log file report; added a couple of missing drugbox parameters |
||
Line 192:
regexp_ref_tag_end = re.compile(r"</ref>")
regexp_citation_template = re.compile(r"\{\{[C|c]ite\s*?(?P<TEMPLATE>.*?)\}\}")
# href='/monograph/maprotiline-hydrochloride.html'
regexp_monograph_url = re.compile("href='/monograph/(?P<STEM>.*?)\.html'", re.DOTALL)
def Allowbots(text):
Line 588 ⟶ 591:
"KEGG" in current_parameters or "ChEBI" in current_parameters or "ChEMBL" in current_parameters):
new_drugbox += "\n<!--Identifiers-->\n"
if current_parameters.has_key("
if current_parameters.has_key("CASNo_Ref"): new_drugbox += "| CASNo_Ref = " + current_parameters['CASNo_Ref'] + "\n"
if current_parameters.has_key("CAS_number"): new_drugbox += "| CAS_number = " + current_parameters['CAS_number'] + "\n"
if current_parameters.has_key("CAS_supplemental"): new_drugbox += "| CAS_supplemental = " + current_parameters['CAS_supplemental'] + "\n"
Line 598 ⟶ 602:
if current_parameters.has_key("PubChemSubstance"): new_drugbox += "| PubChemSubstance = " + current_parameters['PubChemSubstance'] + "\n"
if current_parameters.has_key("IUPHAR_ligand"): new_drugbox += "| IUPHAR_ligand = " + current_parameters['IUPHAR_ligand'] + "\n"
if current_parameters.has_key("DrugBank_Ref"): new_drugbox += "| DrugBank_Ref = " + current_parameters['DrugBank_Ref'] + "\n"
if current_parameters.has_key("DrugBank"): new_drugbox += "| DrugBank = " + current_parameters['DrugBank'] + "\n"
if current_parameters.has_key("ChemSpiderID_Ref"): new_drugbox += "| ChemSpiderID_Ref = " + current_parameters['ChemSpiderID_Ref'] + "\n"
Line 650 ⟶ 655:
if current_parameters.has_key("InChI_Ref"): new_drugbox += "| InChI_Ref = " + current_parameters['InChI_Ref'] + "\n"
if current_parameters.has_key("InChI"): new_drugbox += "| InChI = " + current_parameters['InChI'] + "\n"
if current_parameters.has_key("InChIKey"): new_drugbox += "| InChIKey = " + current_parameters['InChIKey'] + "\n"
if current_parameters.has_key("StdInChI_Ref"): new_drugbox += "| StdInChI_Ref = " + current_parameters['StdInChI_Ref'] + "\n"
if current_parameters.has_key("StdInChI"): new_drugbox += "| StdInChI = " + current_parameters['StdInChI'] + "\n"
Line 753 ⟶ 759:
else:
stems.append(drugname)
# also try common salts
stems.append(drugname + "-hydrochloride")
stems.append(drugname + "-sulfate")
stems.append(drugname + "-chloride")
stems.append(drugname + "-sodium")
stems.append(drugname + "-bromide")
stems.append(drugname + "-maleate")
stems.append(drugname + "-citrate")
if drugbank_drugs_com:
if (string.find(drugbank_drugs_com, "http://www.drugs.com/") > -1):
Line 768 ⟶ 782:
if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
# print "passed link: ", link
raise StopIteration()
else:
opener = urllib.FancyURLopener({})
f = opener.open(link)
text = f.read()
result = regexp_monograph_url.search(text)
if result:
stem = result.group('STEM')
link = "{{drugs.com|" + roots[0][0] + "|" + stem + "}}"
raise StopIteration()
else:
link = ""
Line 799 ⟶ 823:
return False
def savepage(page, text, summary = '', minor = False, log_string = ""):
"""Save text to a page and log exceptions."""
if summary != '':
Line 805 ⟶ 829:
try:
page.put(text, minorEdit = minor)
wikipedia.output('%s \03{green}saving %s' % (log_string, page.title()) )
return ''
except wikipedia.LockedPage:
wikipedia.output('%s \03{red}cannot save %s because it is locked\03{default}' % (log_string, page.title()) )
return '# %s: page was locked\n' % page.aslink()
except wikipedia.EditConflict:
wikipedia.output('%s \03{red}cannot save %s because of edit conflict\03{default}' % (log_string, page.title()) )
return '# %s: edit conflict occurred\n' % page.aslink()
except wikipedia.SpamfilterError, error:
wikipedia.output('%s \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % ((log_string, page.title(), error.url)) )
return '# %s: spam blacklist entry\n' % page.aslink()
except:
wikipedia.output('%s \03{red}unknown error on saving %s\03{default}' % (log_string, page.title()) )
return '# %s: unknown error occurred\n' % page.aslink()
Line 846 ⟶ 870:
new_drugbox = ""
log_string = "* [[" + article + "]], "
site = wikipedia.getSite()
Line 859 ⟶ 882:
if begin:
parameters = text[begin:end]
else:
log_string =
print log_string
continue
Line 871 ⟶ 892:
if unbalanced(parameters):
log_string =
print log_string
continue
Line 896 ⟶ 917:
db_data = drugbank_data[INN]
elif "DrugBank" in current_parameters and current_parameters['DrugBank'] in DrugBank_ID_INN:
INN = DrugBank_ID_INN[current_parameters['DrugBank']]
db_data = drugbank_data[INN]
if not "drug_name" in current_parameters:
Line 907 ⟶ 928:
if "DrugBank" in current_parameters and current_parameters['DrugBank'] in DrugBank_ID_INN:
if DrugBank_ID_INN[current_parameters['DrugBank']] == INN:
else:
else:
if db_data:
if db_data[8]:
current_parameters['DrugBank'] = db_data[8]
Line 970 ⟶ 992:
new_text = text[:begin] + build_new_drugbox(current_parameters) + text[end:]
# print build_new_drugbox(current_parameters)
# print current_parameters
# print new_text
Line 975 ⟶ 999:
if current_parameters:
comment='populated new fields in drugbox and reordered per [[Wikipedia:Bots/Requests_for_approval/BogBot_2|bot approval]]'
status = savepage(page, new_text, comment,
else:
print ", page not updated"
Line 983 ⟶ 1,006:
run()
</source>
|