Jump to content

User talk:Paddy3118: Difference between revisions

Update to Vanity Searcher
(I have broken into the top 100 list of page views!!)
(Update to Vanity Searcher)
Line 256:
 
I did like writing the story around that Knapsack task.
 
 
==Vanity Search Updated==
After the 4th of July RC updates, My RC Vanity Searcher needed to be updated due to HTML changes. I decided to add a modification, that shows the movement between the order a page was created in, vs the max views order called '''+/-'''.
 
<lang python>
'''
Rosetta Code Vanity search:
How many new pages has someone created?
'''
 
import urllib, re, pdb
 
user = 'Paddy3118'
 
site = 'http://www.rosettacode.org'
nextpage = site + '/wiki/Special:Contributions/' + user
nextpage_re = re.compile(
#r'<a href="([^"]+)" title="[^"]+" rel="next">older '
r'<a href="([^"]+)" title="[^"]+" rel="next"[^>]*>older '
)
 
newpages = []
pagecount = 0
while nextpage:
page = urllib.urlopen(nextpage)
pagecount +=1
nextpage = ''
for line in page:
if not nextpage:
# Search for URL to next page of results for download
nextpage_match = re.search(nextpage_re, line)
if nextpage_match:
nextpage = (site + nextpage_match.groups()[0]).replace('&amp;', '&')
#print nextpage
npline=line
if '<span class="newpage">' in line:
# extract N page name from title
newpages.append(line.partition(' title="')[2].partition('"')[0])
page.close()
 
nontalk = [p for p in newpages if not ':' in p]
nontalk.reverse()
 
print "User: %s has created %i new pages of which %i were not Talk: pages, from approx %i edits" % (
user, len(newpages), len(nontalk), pagecount*50 )
print "New pages created, in order, are:\n ",
print "\n ".join(nontalk)
 
 
nextpage = site + '/w/index.php?title=Special:PopularPages'
nextpage_re = re.compile(
#r'<a href="([^"]+)" class="mw-nextlink">next '
r'<a href="([^"]+)"[^>]* class="mw-nextlink"[^>]*>next'
)
 
data_re = re.compile(
r'^<li><a href="[^"]+" title="([^"]+)".*</a>.*\(([0-9,]+) views\)' )
 
title2rankviews = {}
rank = 1
pagecount = 0
while nextpage:
page = urllib.urlopen(nextpage)
pagecount +=1
nextpage = ''
for line in page:
if not nextpage:
# Search for URL to next page of results for download
nextpage_match = re.search(nextpage_re, line)
if nextpage_match:
nextpage = (site + nextpage_match.groups()[0]).replace('&amp;', '&')
# print nextpage
npline=line
datamatch = re.search(data_re, line)
if datamatch:
title, views = datamatch.groups()
views = int(views.replace(',', ''))
title2rankviews[title] = [rank, views]
rank += 1
page.close()
 
print "\n\n Highest page Ranks for user pages:\n"
fmt = " %-4s %-6s %-3s %s" # rank, views, +/- title
print fmt % ('RANK', 'VIEWS', '+/-', 'TITLE')
fmt = " %4s %6s %+3i %s" # rank, views, +/- title
highrank = [title2rankviews.get(t,[99999, 0]) + [t] for t in nontalk]
highrank.sort()
for i,x in enumerate(highrank):
rank, view, title = x
movement = nontalk.index(title) - i
print fmt % (rank, view, movement, title)
</lang>
 
== Poly thanks ==
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.