User talk:Paddy3118: Difference between revisions

← Older edit Newer edit →

Content deleted Content added

Inline

@@ Line 256: / Line 256: @@
 I did like writing the story around that Knapsack task.
+==Vanity Search Updated==
+After the 4th of July RC updates, My RC Vanity Searcher needed to be updated due to HTML changes. I decided to add a modification, that shows the movement between the order a page was created in, vs the max views order called '''+/-'''.
+<lang python>
+'''
+Rosetta Code Vanity search:
+    How many new pages has someone created?
+'''
+import urllib, re, pdb
+user = 'Paddy3118'
+site = 'http://www.rosettacode.org'
+nextpage = site + '/wiki/Special:Contributions/' + user
+nextpage_re = re.compile(
+    #r'<a href="([^"]+)" title="[^"]+" rel="next">older '
+    r'<a href="([^"]+)" title="[^"]+" rel="next"[^>]*>older '
+    )
+newpages = []
+pagecount = 0
+while nextpage:
+    page = urllib.urlopen(nextpage)
+    pagecount +=1
+    nextpage = ''
+    for line in page:
+        if not nextpage:
+            # Search for URL to next page of results for download
+            nextpage_match = re.search(nextpage_re, line)
+            if nextpage_match:
+                nextpage = (site + nextpage_match.groups()[0]).replace('&amp;', '&')
+                #print nextpage
+                npline=line
+        if '<span class="newpage">' in line:
+            # extract N page name from title
+            newpages.append(line.partition(' title="')[2].partition('"')[0])
+    page.close()
+nontalk = [p for p in newpages if not ':' in p]
+nontalk.reverse()
+print "User: %s has created %i new pages of which %i were not Talk: pages, from approx %i edits" % (
+    user, len(newpages), len(nontalk), pagecount*50 )
+print "New pages created, in order, are:\n ",
+print "\n  ".join(nontalk)
+nextpage = site + '/w/index.php?title=Special:PopularPages'
+nextpage_re = re.compile(
+    #r'<a href="([^"]+)" class="mw-nextlink">next '
+    r'<a href="([^"]+)"[^>]* class="mw-nextlink"[^>]*>next'
+    )
+data_re = re.compile(
+    r'^<li><a href="[^"]+" title="([^"]+)".*</a>.*\(([0-9,]+) views\)' )
+title2rankviews = {}
+rank = 1
+pagecount = 0
+while nextpage:
+    page = urllib.urlopen(nextpage)
+    pagecount +=1
+    nextpage = ''
+    for line in page:
+        if not nextpage:
+            # Search for URL to next page of results for download
+            nextpage_match = re.search(nextpage_re, line)
+            if nextpage_match:
+                nextpage = (site + nextpage_match.groups()[0]).replace('&amp;', '&')
+                # print nextpage
+                npline=line
+        datamatch = re.search(data_re, line)
+        if datamatch:
+            title, views = datamatch.groups()
+            views = int(views.replace(',', ''))
+            title2rankviews[title] = [rank, views]
+            rank += 1
+    page.close()
+print "\n\n Highest page Ranks for user pages:\n"
+fmt = "  %-4s %-6s %-3s %s" # rank, views, +/- title
+print fmt % ('RANK', 'VIEWS', '+/-', 'TITLE')
+fmt = "  %4s %6s %+3i %s" # rank, views, +/- title
+highrank = [title2rankviews.get(t,[99999, 0]) + [t] for t in nontalk]
+highrank.sort()
+for i,x in enumerate(highrank):
+    rank, view, title = x
+    movement = nontalk.index(title) - i
+    print fmt % (rank, view, movement, title)
+</lang>
 == Poly thanks ==