Content deleted Content added
removed duplicate code |
updated to include code to generate the {{drugs.com}} template |
||
Line 131:
import csv
import string
# Included for bot exclusion compliance (see http://en.wikipedia.org/wiki/Template:Bots)▼
user = "BogBot"
Line 144 ⟶ 139:
regexp_drug_infobox = re.compile(r"\{\{\s*(Drugbox|drugbox)\s*(?P<PARAMS>.+)\s*\}\}\s*", re.DOTALL)
regexp_param = re.compile(r"^\s?\|\s?(?P<PARAM>\S+)\s?=\s?(?P<VALUE>.+)$")
▲# Included for bot exclusion compliance (see http://en.wikipedia.org/wiki/Template:Bots)
def Allowbots(text):
if (regexp_ab.search(text)):
Line 159 ⟶ 155:
drug_data = csv.reader(open('/Users/BogBot/progs/pywikipedia/drugbox/drug_links_agumented.csv', 'rU'), dialect='excel')
# drugs.com root links:
roots = [("monograph","http://www.drugs.com/monograph/"), ("CDI","http://www.drugs.com/cdi/"), ("CONS","http://www.drugs.com/cons/"), ("MTM","http://www.drugs.com/mtm/"), ("parent","http://www.drugs.com/")]
for row in drug_data:
Line 195:
result_drug_infobox = regexp_drug_infobox.search(text)
if result_drug_infobox:
parameters = result_drug_infobox.group('PARAMS')
current_parameters = {}
Line 208:
current_parameters[parameter] = value
if INN in drugbank_data:
data = drugbank_data[INN]
Line 228:
# print "merck tradenames: ", merck_tradenames
# print "current tradenames: ", current_tradenames
# print "new tradenames: ", current_parameters['tradename']
# test web page, returns "200" if OK:
Line 246:
# | Drugs.com = <!-- link to Drugs.com monograph, e.g., "lisinopril" that links to "http://www.drugs.com/monograph/lisinopril.html" -->
stems = []
INN_html = string.lower(string.replace(INN, " ", "_"))▼
drugnames = []
drugnames.append(INN)
if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter▼
for tradename in new_tradenames:
current_parameters['Drugs.com'] = INN_html▼
drugnames.append(tradename)
for drugname in drugnames:
drugname = string.lower(drugname)
if (string.find(tradename, " ") > 0):
stems.append(string.replace(drugname, " ", "-"))
else:
stems.append(drugname)
try:
for root in roots:
for stem in stems:
link = root[1] + stem + ".html"
# print "attempted Drugs.com link: ", link
▲ if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter
current_parameters['Drugs.com'] = "{{drugs.com|" + root[0] + "|" + stem + "}}"
raise StopIteration()
except StopIteration:
pass
# for parameter, value in current_parameters.iteritems():
Line 407 ⟶ 427:
# replace original drugbox with new drugbox
new_text = re.sub(regexp_drug_infobox, new_drugbox, text)
page.put(new_text, comment='populated clinical fields in drugbox', watchArticle = None, minorEdit = True)
|