Yahoo! search interface: Difference between revisions
(fixed c# enchanced results) |
Underscore (talk | contribs) (Added Perl.) |
||
Line 85: | Line 85: | ||
} |
} |
||
}</lang> |
}</lang> |
||
=={{header|Perl}}== |
|||
<lang perl>package YahooSearch; |
|||
use Encode; |
|||
use HTTP::Cookies; |
|||
use WWW::Mechanize; |
|||
# --- Internals ------------------------------------------------- |
|||
sub apply (&$) |
|||
{my $f = shift; local $_ = shift; $f->(); return $_;} |
|||
# We construct a cookie to get 100 results per page and prevent |
|||
# "enhanced results". |
|||
my $search_prefs = 'v=1&n=100&sm=' . |
|||
apply {s/([^a-zA-Z0-9])/sprintf '%%%02X', ord $1/ge} |
|||
join '|', |
|||
map {'!' . $_} |
|||
qw(hsb Zq0 XbM sss dDO VFM RQh uZ0 Fxe yCl GP4 FZK yNC mEG niH); |
|||
my $cookies = HTTP::Cookies->new; |
|||
$cookies->set_cookie(0, 'sB', $search_prefs, '/', 'search.yahoo.com'); |
|||
my $mech = new WWW::Mechanize |
|||
(cookie_jar => $cookies, |
|||
stack_depth => 0); |
|||
sub read_page |
|||
{my ($next, $page, @results) = |
|||
($mech->find_link(text => 'Next >')->url, |
|||
decode 'iso-8859-1', $mech->content); |
|||
while ($page =~ m |
|||
{<h3> <a \s class="yschttl \s spt" \s |
|||
href=" ([^"]+) " \s* > #" |
|||
(.+?) </a> |
|||
.+? |
|||
<div \s class="abstr"> |
|||
(.+?) </div>}xg) |
|||
{push @results, {url => $1, title => $2, content => $3}; |
|||
foreach ( @{$results[-1]}{qw(title content)} ) |
|||
{s/<.+?>//g; |
|||
$_ = encode 'utf8', $_;}} |
|||
return $next, \@results;} |
|||
# --- Methods --------------------------------------------------- |
|||
sub new |
|||
{my $invocant = shift; |
|||
my $class = ref($invocant) || $invocant; |
|||
$mech->get('http://search.yahoo.com/search?p=' . apply |
|||
{s/([^a-zA-Z0-9 ])/sprintf '%%%02X', ord $1/ge; |
|||
s/ /+/g;} |
|||
shift); |
|||
my ($next, $results) = read_page(); |
|||
return bless {link_to_next => $next, results => $results}, $class;} |
|||
sub results |
|||
{@{shift()->{results}};} |
|||
sub next_page |
|||
{my $invocant = shift; |
|||
my $next = $invocant->{link_to_next}; |
|||
unless ($next) |
|||
{$invocant->{results} = []; |
|||
return undef;} |
|||
$mech->get($next); |
|||
($next, my $results) = read_page(); |
|||
$invocant->{link_to_next} = $next; |
|||
$invocant->{results} = $results; |
|||
return 1;}</lang> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
Revision as of 21:00, 12 May 2009
You are encouraged to solve this task according to the task description, using any language you may know.
Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.
C#
<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;
class YahooSearch {
private string query; private string content; private int page = 1;
public YahooSearch(string query) { this.query = query; this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query); }
public YahooSearch(string query, int page) { this.query = query; this.page = page; this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1)); }
string Fix(string x) { x = x.Replace("", "").Replace("", "").Replace("", "").Replace(" ", "").Replace("...", "");
int i = x.IndexOf("</a>");
if (i > 0) return x.Substring(0, i); else return x; }
public YahooResult[] Results { get { ArrayList results = new ArrayList();
foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>(?(
))
(.+?)").Matches(this.content)) {
string rurl = Fix(e.Groups[3].Value); string rtitle = Fix(e.Groups[1].Value); string rcontent = Fix(e.Groups[2].Value); results.Add(new YahooResult(rurl, rtitle, rcontent)); } return (YahooResult[])results.ToArray(typeof(YahooResult)); } }
public YahooSearch NextPage() { return new YahooSearch(this.query, this.page + 1); }
public YahooSearch GetPage(int page) { return new YahooSearch(this.query, page); }
}
class YahooResult {
public string URL { get; set; } public string Title { get; set; } public string Content { get; set; }
public YahooResult(string url, string title, string content) { this.URL = url; this.Title = title; this.Content = content; }
}
// Usage:
class Prog {
static void Main() { YahooSearch x = new YahooSearch("test");
foreach (YahooResult result in x.Results) { Console.WriteLine(result.Title); } }
}</lang>
Perl
<lang perl>package YahooSearch;
use Encode; use HTTP::Cookies; use WWW::Mechanize;
- --- Internals -------------------------------------------------
sub apply (&$)
{my $f = shift; local $_ = shift; $f->(); return $_;}
- We construct a cookie to get 100 results per page and prevent
- "enhanced results".
my $search_prefs = 'v=1&n=100&sm=' .
apply {s/([^a-zA-Z0-9])/sprintf '%%%02X', ord $1/ge} join '|', map {'!' . $_} qw(hsb Zq0 XbM sss dDO VFM RQh uZ0 Fxe yCl GP4 FZK yNC mEG niH);
my $cookies = HTTP::Cookies->new; $cookies->set_cookie(0, 'sB', $search_prefs, '/', 'search.yahoo.com');
my $mech = new WWW::Mechanize
(cookie_jar => $cookies, stack_depth => 0);
sub read_page
{my ($next, $page, @results) = ($mech->find_link(text => 'Next >')->url, decode 'iso-8859-1', $mech->content); while ($page =~ m
{
<a \s class="yschttl \s spt" \s
href=" ([^"]+) " \s* > #"
(.+?) </a>
.+?
(.+?) }xg)
{push @results, {url => $1, title => $2, content => $3};
foreach ( @{$results[-1]}{qw(title content)} )
{s/<.+?>//g;
$_ = encode 'utf8', $_;}}
return $next, \@results;}
- --- Methods ---------------------------------------------------
sub new
{my $invocant = shift;
my $class = ref($invocant) || $invocant;
$mech->get('http://search.yahoo.com/search?p=' . apply
{s/([^a-zA-Z0-9 ])/sprintf '%%%02X', ord $1/ge;
s/ /+/g;}
shift);
my ($next, $results) = read_page();
return bless {link_to_next => $next, results => $results}, $class;}
sub results
{@{shift()->{results}};}
sub next_page
{my $invocant = shift;
my $next = $invocant->{link_to_next};
unless ($next)
{$invocant->{results} = [];
return undef;}
$mech->get($next);
($next, my $results) = read_page();
$invocant->{link_to_next} = $next;
$invocant->{results} = $results;
return 1;}</lang>
Python
<lang python>import urllib
import re
def fix(x):
x = x.replace("","").replace("","").replace(" ","").replace(" ","").replace("...","")
return x[:x.find("</a>
")]
class YahooSearch:
def __init__(self, query, page=1): self.query = query self.page = page self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1)) self.content = urllib.urlopen(self.url).read() def getresults(self): self.results = []
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "
(.+?)",self.content):
title = fix(i[0]) content = fix(i[1]) url = fix(i[2]) self.results.append(YahooResult(title, content, url)) return self.results def getnextpage(self): return YahooSearch(self.query, self.page+1) results = property(fget=getresults) nextpage = property(fget=getnextpage)
class YahooResult:
def __init__(self,title,content,url): self.title = title self.content = content self.url = url
- Usage:
x = YahooSearch("test")
for result in x.results:
print result.title</lang>