Yahoo! search interface: Difference between revisions

From Rosetta Code
Content added Content deleted
(added python)
(fixed enchanced results)
Line 94: Line 94:


=={{header|Python}}==
=={{header|Python}}==

{{incorrect|Python|This examples is working perfectly, but "enchanced results" are not working.}}


<lang python>import urllib
<lang python>import urllib
import re
import re


def fix(x):
x = x.replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","")
return x[:x.find("</a></h3></div>")]
class YahooSearch:
class YahooSearch:
def __init__(self, query, page=1):
def __init__(self, query, page=1):
Line 106: Line 108:
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1))
self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1))
self.content = urllib.urlopen(self.url).read()
self.content = urllib.urlopen(self.url).read()
def getresults(self):
def getresults(self):
self.results = []
self.results = []
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>"+\
for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>"+\
"<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>",self.content):
"<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>",self.content):
title = fix(i[0])
title = i[0].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","")
content = i[1].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","")
content = fix(i[1])
url = fix(i[2])
url = i[2].replace("<b>","").replace("</b>","").replace("<wbr />","").replace("<wbr>","").replace("<b>...</b>","")

self.results.append(YahooResult(title, content, url))
self.results.append(YahooResult(title, content, url))
return self.results
return self.results

def getnextpage(self):
def getnextpage(self):
return YahooSearch(self.query, self.page+1)
return YahooSearch(self.query, self.page+1)

results = property(fget=getresults)
results = property(fget=getresults)
nextpage = property(fget=getnextpage)
nextpage = property(fget=getnextpage)
class YahooResult:
class YahooResult:
def __init__(self,title,content,url):
def __init__(self,title,content,url):
Line 132: Line 134:
self.content = content
self.content = content
self.url = url
self.url = url

# Usage:
# Usage:

x = YahooSearch("test")
x = YahooSearch("test")

for result in x.results:
for result in x.results:
print result.title</lang>
print result.title</lang>

Revision as of 16:24, 5 May 2009

Task
Yahoo! search interface
You are encouraged to solve this task according to the task description, using any language you may know.

Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.

C#

This example is incorrect. Please fix the code and remove this message.

Details: This examples is working perfectly, but "enchanced results" are not working.

<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;

class YahooSearch {

   private string query;
   private string content;
   private int page = 1;
   public YahooSearch(string query) {
       this.query = query;
       this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query);
   }
   public YahooSearch(string query, int page) {
       this.query = query;
       this.page = page;
       this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1));
   }    
   public long Length {
       get {            
           return long.Parse(new Regex(".+? of (.+?) for").
               Match(this.content).Groups[1].Value.Replace(",", ""));
       }
   }
   public YahooResult[] Results {
       get {
           ArrayList results = new ArrayList();

foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>

(.+?)

(.+?)").Matches(this.content)) {

               string rurl = e.Groups[3].Value.
                   Replace("", "").Replace("", "").Replace("", "").
                   Replace("","");
               string rtitle = e.Groups[1].Value.
                   Replace("", "").Replace("", "").Replace("","");
               string rcontent = e.Groups[2].Value.
                   Replace("", "").Replace("", "").Replace("...", "").
                   Replace("","");
               Console.WriteLine(rurl);
               results.Add(new YahooResult(rurl, rtitle, rcontent));
           }
           return (YahooResult[])results.ToArray(typeof(YahooResult));
       }
   }
   public YahooSearch NextPage() {
       return new YahooSearch(this.query, this.page + 1);
   }
   public YahooSearch GetPage(int page) {
       return new YahooSearch(this.query, page);
   }

}

class YahooResult {

   public string URL { get; set; }
   public string Title { get; set; }
   public string Content { get; set; }
   public YahooResult(string url, string title, string content) {
       this.URL = url;
       this.Title = title;
       this.Content = content;
   }

}

// Usage:

class Prog {

   static void Main() {
       YahooSearch x = new YahooSearch("test");
       foreach (YahooResult result in x.Results) {
           Console.WriteLine(result.Title);
       }
   }

}</lang>

Python

<lang python>import urllib import re

def fix(x):

   x =  x.replace("","").replace("","").replace("","").replace("","").replace("...","")

return x[:x.find("</a>")]

class YahooSearch:

   def __init__(self, query, page=1):       
       self.query = query
       self.page = page
       self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1))
       self.content = urllib.urlopen(self.url).read()        

   def getresults(self):
       self.results = []

for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "

(.+?)

(.+?)",self.content):

           title = fix(i[0])
           content = fix(i[1])
           url = fix(i[2])

           self.results.append(YahooResult(title, content, url))

       return self.results

   def getnextpage(self):
       return YahooSearch(self.query, self.page+1)

   results = property(fget=getresults)
   nextpage = property(fget=getnextpage)

class YahooResult:

   def __init__(self,title,content,url):
       self.title = title
       self.content = content
       self.url = url

  1. Usage:

x = YahooSearch("test")

for result in x.results:

   print result.title</lang>