User talk:Paddy3118: Difference between revisions
Content deleted Content added
I have broken into the top 100 list of page views!! |
Update to Vanity Searcher |
||
Line 256: | Line 256: | ||
I did like writing the story around that Knapsack task. |
I did like writing the story around that Knapsack task. |
||
==Vanity Search Updated== |
|||
After the 4th of July RC updates, My RC Vanity Searcher needed to be updated due to HTML changes. I decided to add a modification, that shows the movement between the order a page was created in, vs the max views order called '''+/-'''. |
|||
<lang python> |
|||
''' |
|||
Rosetta Code Vanity search: |
|||
How many new pages has someone created? |
|||
''' |
|||
import urllib, re, pdb |
|||
user = 'Paddy3118' |
|||
site = 'http://www.rosettacode.org' |
|||
nextpage = site + '/wiki/Special:Contributions/' + user |
|||
nextpage_re = re.compile( |
|||
#r'<a href="([^"]+)" title="[^"]+" rel="next">older ' |
|||
r'<a href="([^"]+)" title="[^"]+" rel="next"[^>]*>older ' |
|||
) |
|||
newpages = [] |
|||
pagecount = 0 |
|||
while nextpage: |
|||
page = urllib.urlopen(nextpage) |
|||
pagecount +=1 |
|||
nextpage = '' |
|||
for line in page: |
|||
if not nextpage: |
|||
# Search for URL to next page of results for download |
|||
nextpage_match = re.search(nextpage_re, line) |
|||
if nextpage_match: |
|||
nextpage = (site + nextpage_match.groups()[0]).replace('&', '&') |
|||
#print nextpage |
|||
npline=line |
|||
if '<span class="newpage">' in line: |
|||
# extract N page name from title |
|||
newpages.append(line.partition(' title="')[2].partition('"')[0]) |
|||
page.close() |
|||
nontalk = [p for p in newpages if not ':' in p] |
|||
nontalk.reverse() |
|||
print "User: %s has created %i new pages of which %i were not Talk: pages, from approx %i edits" % ( |
|||
user, len(newpages), len(nontalk), pagecount*50 ) |
|||
print "New pages created, in order, are:\n ", |
|||
print "\n ".join(nontalk) |
|||
nextpage = site + '/w/index.php?title=Special:PopularPages' |
|||
nextpage_re = re.compile( |
|||
#r'<a href="([^"]+)" class="mw-nextlink">next ' |
|||
r'<a href="([^"]+)"[^>]* class="mw-nextlink"[^>]*>next' |
|||
) |
|||
data_re = re.compile( |
|||
r'^<li><a href="[^"]+" title="([^"]+)".*</a>.*\(([0-9,]+) views\)' ) |
|||
title2rankviews = {} |
|||
rank = 1 |
|||
pagecount = 0 |
|||
while nextpage: |
|||
page = urllib.urlopen(nextpage) |
|||
pagecount +=1 |
|||
nextpage = '' |
|||
for line in page: |
|||
if not nextpage: |
|||
# Search for URL to next page of results for download |
|||
nextpage_match = re.search(nextpage_re, line) |
|||
if nextpage_match: |
|||
nextpage = (site + nextpage_match.groups()[0]).replace('&', '&') |
|||
# print nextpage |
|||
npline=line |
|||
datamatch = re.search(data_re, line) |
|||
if datamatch: |
|||
title, views = datamatch.groups() |
|||
views = int(views.replace(',', '')) |
|||
title2rankviews[title] = [rank, views] |
|||
rank += 1 |
|||
page.close() |
|||
print "\n\n Highest page Ranks for user pages:\n" |
|||
fmt = " %-4s %-6s %-3s %s" # rank, views, +/- title |
|||
print fmt % ('RANK', 'VIEWS', '+/-', 'TITLE') |
|||
fmt = " %4s %6s %+3i %s" # rank, views, +/- title |
|||
highrank = [title2rankviews.get(t,[99999, 0]) + [t] for t in nontalk] |
|||
highrank.sort() |
|||
for i,x in enumerate(highrank): |
|||
rank, view, title = x |
|||
movement = nontalk.index(title) - i |
|||
print fmt % (rank, view, movement, title) |
|||
</lang> |
|||
== Poly thanks == |
== Poly thanks == |