Yahoo! search interface
You are encouraged to solve this task according to the task description, using any language you may know.
Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.
C#
<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;
class YahooSearch {
private string query; private string content; private int page = 1;
public YahooSearch(string query) { this.query = query; this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query); }
public YahooSearch(string query, int page) { this.query = query; this.page = page; this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1)); }
public long Length {
get {
return long.Parse(new Regex(".+? of (.+?) for").
Match(this.content).Groups[1].Value.Replace(",", ""));
}
}
public YahooResult[] Results { get { ArrayList results = new ArrayList();
foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>
(.+?)").Matches(this.content)) {
string rurl = e.Groups[3].Value. Replace("", "").Replace("", "").Replace("", ""). Replace(" ",""); string rtitle = e.Groups[1].Value. Replace("", "").Replace("", "").Replace(" ",""); string rcontent = e.Groups[2].Value. Replace("", "").Replace("", "").Replace("...", ""). Replace(" ","");
Console.WriteLine(rurl); results.Add(new YahooResult(rurl, rtitle, rcontent)); } return (YahooResult[])results.ToArray(typeof(YahooResult)); } }
public YahooSearch NextPage() { return new YahooSearch(this.query, this.page + 1); }
public YahooSearch GetPage(int page) { return new YahooSearch(this.query, page); }
}
class YahooResult {
public string URL { get; set; } public string Title { get; set; } public string Content { get; set; }
public YahooResult(string url, string title, string content) { this.URL = url; this.Title = title; this.Content = content; }
}
// Usage:
class Prog {
static void Main() { YahooSearch x = new YahooSearch("test");
foreach (YahooResult result in x.Results) { Console.WriteLine(result.Title); } }
}</lang>
Python
<lang python>import urllib import re
def fix(x):
x = x.replace("","").replace("","").replace("","").replace(" ","").replace("...","")
return x[:x.find("</a>")]
class YahooSearch:
def __init__(self, query, page=1): self.query = query self.page = page self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) self.content = urllib.urlopen(self.url).read() def getresults(self): self.results = []
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "
(.+?)",self.content):
title = fix(i[0]) content = fix(i[1]) url = fix(i[2]) self.results.append(YahooResult(title, content, url)) return self.results def getnextpage(self): return YahooSearch(self.query, self.page+1) results = property(fget=getresults) nextpage = property(fget=getnextpage)
class YahooResult:
def __init__(self,title,content,url): self.title = title self.content = content self.url = url
- Usage:
x = YahooSearch("test")
for result in x.results:
print result.title</lang>