Yahoo! search interface: Difference between revisions
(added python) |
(fixed enchanced results) |
||
Line 94: | Line 94: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
{{incorrect|Python|This examples is working perfectly, but "enchanced results" are not working.}} |
|||
<lang python>import urllib |
<lang python>import urllib |
||
import re |
import re |
||
def fix(x): |
|||
⚫ | |||
return x[:x.find("</a></h3></div>")] |
|||
class YahooSearch: |
class YahooSearch: |
||
def __init__(self, query, page=1): |
def __init__(self, query, page=1): |
||
Line 106: | Line 108: | ||
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) |
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) |
||
self.content = urllib.urlopen(self.url).read() |
self.content = urllib.urlopen(self.url).read() |
||
def getresults(self): |
def getresults(self): |
||
self.results = [] |
self.results = [] |
||
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>"+\ |
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>"+\ |
||
"<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>",self.content): |
"<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>",self.content): |
||
⚫ | |||
title = fix(i[0]) |
|||
title = i[0].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","") |
|||
content = i[1] |
content = fix(i[1]) |
||
⚫ | |||
⚫ | |||
self.results.append(YahooResult(title, content, url)) |
self.results.append(YahooResult(title, content, url)) |
||
⚫ | |||
return self.results |
return self.results |
||
def getnextpage(self): |
def getnextpage(self): |
||
return YahooSearch(self.query, self.page+1) |
return YahooSearch(self.query, self.page+1) |
||
results = property(fget=getresults) |
results = property(fget=getresults) |
||
nextpage = property(fget=getnextpage) |
nextpage = property(fget=getnextpage) |
||
class YahooResult: |
class YahooResult: |
||
def __init__(self,title,content,url): |
def __init__(self,title,content,url): |
||
Line 132: | Line 134: | ||
self.content = content |
self.content = content |
||
self.url = url |
self.url = url |
||
# Usage: |
# Usage: |
||
x = YahooSearch("test") |
x = YahooSearch("test") |
||
for result in x.results: |
for result in x.results: |
||
print result.title</lang> |
print result.title</lang> |
Revision as of 16:24, 5 May 2009
You are encouraged to solve this task according to the task description, using any language you may know.
Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.
C#
<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;
class YahooSearch {
private string query; private string content; private int page = 1;
public YahooSearch(string query) { this.query = query; this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query); }
public YahooSearch(string query, int page) { this.query = query; this.page = page; this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1)); }
public long Length {
get {
return long.Parse(new Regex(".+? of (.+?) for").
Match(this.content).Groups[1].Value.Replace(",", ""));
}
}
public YahooResult[] Results { get { ArrayList results = new ArrayList();
foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>
(.+?)").Matches(this.content)) {
string rurl = e.Groups[3].Value. Replace("", "").Replace("", "").Replace("", ""). Replace(" ",""); string rtitle = e.Groups[1].Value. Replace("", "").Replace("", "").Replace(" ",""); string rcontent = e.Groups[2].Value. Replace("", "").Replace("", "").Replace("...", ""). Replace(" ","");
Console.WriteLine(rurl); results.Add(new YahooResult(rurl, rtitle, rcontent)); } return (YahooResult[])results.ToArray(typeof(YahooResult)); } }
public YahooSearch NextPage() { return new YahooSearch(this.query, this.page + 1); }
public YahooSearch GetPage(int page) { return new YahooSearch(this.query, page); }
}
class YahooResult {
public string URL { get; set; } public string Title { get; set; } public string Content { get; set; }
public YahooResult(string url, string title, string content) { this.URL = url; this.Title = title; this.Content = content; }
}
// Usage:
class Prog {
static void Main() { YahooSearch x = new YahooSearch("test");
foreach (YahooResult result in x.Results) { Console.WriteLine(result.Title); } }
}</lang>
Python
<lang python>import urllib import re
def fix(x):
x = x.replace("","").replace("","").replace("","").replace(" ","").replace("...","")
return x[:x.find("</a>")]
class YahooSearch:
def __init__(self, query, page=1): self.query = query self.page = page self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) self.content = urllib.urlopen(self.url).read() def getresults(self): self.results = []
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "
(.+?)",self.content):
title = fix(i[0]) content = fix(i[1]) url = fix(i[2]) self.results.append(YahooResult(title, content, url)) return self.results def getnextpage(self): return YahooSearch(self.query, self.page+1) results = property(fget=getresults) nextpage = property(fget=getnextpage)
class YahooResult:
def __init__(self,title,content,url): self.title = title self.content = content self.url = url
- Usage:
x = YahooSearch("test")
for result in x.results:
print result.title</lang>