Yahoo! search interface: Difference between revisions
No edit summary |
(added python) |
||
Line 2: | Line 2: | ||
Create a class for searching Yahoo results. |
Create a class for searching Yahoo results. |
||
It must implement a '''Next Page''' method, and read URL, Title and Content from |
It must implement a '''Next Page''' method, and read URL, Title and Content from results. |
||
=={{header|C sharp|C#}}== |
=={{header|C sharp|C#}}== |
||
{{incorrect|C sharp|This examples is working perfectly, but "enchanced results" |
{{incorrect|C sharp|This examples is working perfectly, but "enchanced results" are not working.}} |
||
<lang csharp>using System; |
<lang csharp>using System; |
||
Line 81: | Line 81: | ||
} |
} |
||
//Usage: |
// Usage: |
||
class Prog { |
class Prog { |
||
static void Main() { |
static void Main() { |
||
YahooSearch x = new YahooSearch("test"); |
YahooSearch x = new YahooSearch("test"); |
||
⚫ | |||
foreach (YahooResult result in x.Results) { |
|||
⚫ | |||
} |
|||
} |
} |
||
}</lang> |
}</lang> |
||
=={{header|Python}}== |
|||
{{incorrect|Python|This examples is working perfectly, but "enchanced results" are not working.}} |
|||
<lang python>import urllib |
|||
import re |
|||
class YahooSearch: |
|||
def __init__(self, query, page=1): |
|||
self.query = query |
|||
self.page = page |
|||
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) |
|||
self.content = urllib.urlopen(self.url).read() |
|||
def getresults(self): |
|||
self.results = [] |
|||
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>"+\ |
|||
"<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>",self.content): |
|||
title = i[0].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","") |
|||
content = i[1].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","") |
|||
url = i[2].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","") |
|||
self.results.append(YahooResult(title, content, url)) |
|||
return self.results |
|||
def getnextpage(self): |
|||
return YahooSearch(self.query, self.page+1) |
|||
results = property(fget=getresults) |
|||
nextpage = property(fget=getnextpage) |
|||
class YahooResult: |
|||
def __init__(self,title,content,url): |
|||
self.title = title |
|||
self.content = content |
|||
self.url = url |
|||
# Usage: |
|||
x = YahooSearch("test") |
|||
for result in x.results: |
|||
print result.title</lang> |
Revision as of 22:13, 4 May 2009
You are encouraged to solve this task according to the task description, using any language you may know.
Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.
C#
<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;
class YahooSearch {
private string query; private string content; private int page = 1;
public YahooSearch(string query) { this.query = query; this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query); }
public YahooSearch(string query, int page) { this.query = query; this.page = page; this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1)); }
public long Length {
get {
return long.Parse(new Regex(".+? of (.+?) for").
Match(this.content).Groups[1].Value.Replace(",", ""));
}
}
public YahooResult[] Results { get { ArrayList results = new ArrayList();
foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>
(.+?)").Matches(this.content)) {
string rurl = e.Groups[3].Value. Replace("", "").Replace("", "").Replace("", ""). Replace(" ",""); string rtitle = e.Groups[1].Value. Replace("", "").Replace("", "").Replace(" ",""); string rcontent = e.Groups[2].Value. Replace("", "").Replace("", "").Replace("...", ""). Replace(" ","");
Console.WriteLine(rurl); results.Add(new YahooResult(rurl, rtitle, rcontent)); } return (YahooResult[])results.ToArray(typeof(YahooResult)); } }
public YahooSearch NextPage() { return new YahooSearch(this.query, this.page + 1); }
public YahooSearch GetPage(int page) { return new YahooSearch(this.query, page); }
}
class YahooResult {
public string URL { get; set; } public string Title { get; set; } public string Content { get; set; }
public YahooResult(string url, string title, string content) { this.URL = url; this.Title = title; this.Content = content; }
}
// Usage:
class Prog {
static void Main() { YahooSearch x = new YahooSearch("test");
foreach (YahooResult result in x.Results) { Console.WriteLine(result.Title); } }
}</lang>
Python
<lang python>import urllib import re
class YahooSearch:
def __init__(self, query, page=1): self.query = query self.page = page self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) self.content = urllib.urlopen(self.url).read() def getresults(self): self.results = []
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "
(.+?)",self.content):
title = i[0].replace("","").replace("","").replace("","").replace(" ","").replace("...","") content = i[1].replace("","").replace("","").replace(" ","").replace(" ","").replace("...","") url = i[2].replace("","").replace("","").replace(" ","").replace(" ","").replace("...","")
self.results.append(YahooResult(title, content, url)) return self.results
def getnextpage(self): return YahooSearch(self.query, self.page+1)
results = property(fget=getresults) nextpage = property(fget=getnextpage)
class YahooResult:
def __init__(self,title,content,url): self.title = title self.content = content self.url = url
- Usage:
x = YahooSearch("test")
for result in x.results:
print result.title</lang>