Revision as of 07:06, 23 July 2011 edit Boghog (talk \| contribs) Autopatrolled, Extended confirmed users, IP block exemptions, New page reviewers, Pending changes reviewers, Rollbackers, Template editors 142,854 edits removed duplicate code ← Previous edit		Revision as of 20:35, 24 July 2011 edit undo Boghog (talk \| contribs) Autopatrolled, Extended confirmed users, IP block exemptions, New page reviewers, Pending changes reviewers, Rollbackers, Template editors 142,854 edits updated to include code to generate the {{drugs.com}} template Next edit →
Line 131: import csv import string # Included for bot exclusion compliance (see http://en.wikipedia.org/wiki/Template:Bots)▼ ~~# compiled regular expression~~ user = "BogBot" Line 144 ⟶ 139: regexp_drug_infobox = re.compile(r"\{\{\s(Drugbox\|drugbox)\s(?P<PARAMS>.+)\s\}\}\s", re.DOTALL) regexp_param = re.compile(r"^\s?\\|\s?(?P<PARAM>\S+)\s?=\s?(?P<VALUE>.+)$") ▼ ▲# Included for bot exclusion compliance (see http://en.wikipedia.org/wiki/Template:Bots) ▲ def Allowbots(text): if (regexp_ab.search(text)): Line 159 ⟶ 155: drug_data = csv.reader(open('/Users/BogBot/progs/pywikipedia/drugbox/drug_links_agumented.csv', 'rU'), dialect='excel') # drugs.com root links: roots = [("monograph","http://www.drugs.com/monograph/"), ("CDI","http://www.drugs.com/cdi/"), ("CONS","http://www.drugs.com/cons/"), ("MTM","http://www.drugs.com/mtm/"), ("parent","http://www.drugs.com/")] for row in drug_data: Line 195: result_drug_infobox = regexp_drug_infobox.search(text) if result_drug_infobox: # print "found it!" parameters = result_drug_infobox.group('PARAMS') current_parameters = {} Line 208: current_parameters[parameter] = value # print "INN: ", INN if INN in drugbank_data: data = drugbank_data[INN] Line 228: # print "merck tradenames: ", merck_tradenames # print "current tradenames: ", current_tradenames # print "new tradenames: ", current_parameters['tradename'] # test web page, returns "200" if OK: Line 246: # \| Drugs.com = <!-- link to Drugs.com monograph, e.g., "lisinopril" that links to "http://www.drugs.com/monograph/lisinopril.html" --> stems = [] INN_html = string.lower(string.replace(INN, " ", "_"))▼ drugnames = [] ~~link = "http://www.drugs.com/monograph/" + INN_html + ".html"~~ drugnames.append(INN) if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter▼ for tradename in new_tradenames: current_parameters['Drugs.com'] = INN_html▼ drugnames.append(tradename) ~~# print "Drugs.com: ", current_parameters['Drugs.com']~~ for drugname in drugnames: drugname = string.lower(drugname) if (string.find(tradename, " ") > 0): ▲ ~~INN_html~~ = ~~string~~ stems.~~lower~~append(string.replace(~~INN~~drugname, " ", "_")) stems.append(string.replace(drugname, " ", "-")) else: stems.append(drugname) try: for root in roots: for stem in stems: link = root[1] + stem + ".html" # print "attempted Drugs.com link: ", link ▲ if urllib.urlopen(link).getcode() == 200: # test link status to make sure it is good before assigning parameter current_parameters['Drugs.com'] = "{{drugs.com\|" + root[0] + "\|" + stem + "}}" raise StopIteration() ▲# print "Drugs.com: ", current_parameters['Drugs.com'] ~~= INN_html~~ except StopIteration: pass # for parameter, value in current_parameters.iteritems(): Line 407 ⟶ 427: # replace original drugbox with new drugbox new_text = re.sub(regexp_drug_infobox, new_drugbox, text) # print new_text page.put(new_text, comment='populated clinical fields in drugbox', watchArticle = None, minorEdit = True)

User:BogBot/Source code/Task 03: Difference between revisions