Yahoo! search interface: Difference between revisions

From Rosetta Code
Content added Content deleted
(fixed enchanced results)
(fixed c# enchanced results)
Line 5: Line 5:


=={{header|C sharp|C#}}==
=={{header|C sharp|C#}}==

{{incorrect|C sharp|This examples is working perfectly, but "enchanced results" are not working.}}


<lang csharp>using System;
<lang csharp>using System;
Line 30: Line 28:
this.page = page;
this.page = page;
this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1));
this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1));
}

public long Length {
get {
return long.Parse(new Regex("<span id=\"infotext\">.+? of (.+?) for").
Match(this.content).Groups[1].Value.Replace(",", ""));
}
}
}

string Fix(string x) {
x = x.Replace("<b>", "").Replace("</b>", "").Replace("<wbr />", "").Replace("<wbr>", "").Replace("<b>...</b>", "");
int i = x.IndexOf("</a></h3>");

if (i > 0) return x.Substring(0, i);
else return x;
}


public YahooResult[] Results {
public YahooResult[] Results {
Line 43: Line 42:
ArrayList results = new ArrayList();
ArrayList results = new ArrayList();


foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div><div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>").Matches(this.content)) {
foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a></h3></div>(?(<div class=\"sm-bd sm-nophoto\" id=\"sm-bd-4-1\">.+?</div>))<div class=\"abstr\">(.+?)</div><span class=url>(.+?)</span>").Matches(this.content)) {
string rurl = e.Groups[3].Value.
string rurl = Fix(e.Groups[3].Value);
Replace("<b>", "").Replace("</b>", "").Replace("<wbr />", "").
string rtitle = Fix(e.Groups[1].Value);
Replace("<wbr>","");
string rcontent = Fix(e.Groups[2].Value);
string rtitle = e.Groups[1].Value.
Replace("<b>", "").Replace("</b>", "").Replace("<wbr />","");
string rcontent = e.Groups[2].Value.
Replace("<b>", "").Replace("</b>", "").Replace("<b>...</b>", "").
Replace("<wbr />","");

Console.WriteLine(rurl);
results.Add(new YahooResult(rurl, rtitle, rcontent));
results.Add(new YahooResult(rurl, rtitle, rcontent));
}
}

Revision as of 16:58, 5 May 2009

Task
Yahoo! search interface
You are encouraged to solve this task according to the task description, using any language you may know.

Create a class for searching Yahoo results. It must implement a Next Page method, and read URL, Title and Content from results.

C#

<lang csharp>using System; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Linq;

class YahooSearch {

   private string query;
   private string content;
   private int page = 1;
   public YahooSearch(string query) {
       this.query = query;
       this.content = new WebClient().DownloadString("http://search.yahoo.com/search?p=" + query);
   }
   public YahooSearch(string query, int page) {
       this.query = query;
       this.page = page;
       this.content = new WebClient().DownloadString(String.Format("http://search.yahoo.com/search?p={0}&b={1}", query, ((this.page - 1) * 10) + 1));
   }
   string Fix(string x) {
       x = x.Replace("", "").Replace("", "").Replace("", "").Replace("", "").Replace("...", "");

int i = x.IndexOf("</a>");

       if (i > 0) return x.Substring(0, i);
       else return x;        
   } 
   public YahooResult[] Results {
       get {
           ArrayList results = new ArrayList();

foreach (Match e in new Regex("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>(?(

.+?

))

(.+?)

(.+?)").Matches(this.content)) {

               string rurl = Fix(e.Groups[3].Value);
               string rtitle = Fix(e.Groups[1].Value);
               string rcontent = Fix(e.Groups[2].Value);
               
               results.Add(new YahooResult(rurl, rtitle, rcontent));
           }
           return (YahooResult[])results.ToArray(typeof(YahooResult));
       }
   }
   public YahooSearch NextPage() {
       return new YahooSearch(this.query, this.page + 1);
   }
   public YahooSearch GetPage(int page) {
       return new YahooSearch(this.query, page);
   }

}

class YahooResult {

   public string URL { get; set; }
   public string Title { get; set; }
   public string Content { get; set; }
   public YahooResult(string url, string title, string content) {
       this.URL = url;
       this.Title = title;
       this.Content = content;
   }

}

// Usage:

class Prog {

   static void Main() {
       YahooSearch x = new YahooSearch("test");
       foreach (YahooResult result in x.Results) {
           Console.WriteLine(result.Title);
       }
   }

}</lang>

Python

<lang python>import urllib import re

def fix(x):

   x =  x.replace("","").replace("","").replace("","").replace("","").replace("...","")

return x[:x.find("</a>")]

class YahooSearch:

   def __init__(self, query, page=1):       
       self.query = query
       self.page = page
       self.url = "http://search.yahoo.com/search?p=%s&b=%s" %(self.query, ((self.page - 1) * 10 + 1))
       self.content = urllib.urlopen(self.url).read()        

   def getresults(self):
       self.results = []

for i in re.findall("<a class=\"yschttl spt\" href=\".+?\" >(.+?)</a>"+\ "

(.+?)

(.+?)",self.content):

           title = fix(i[0])
           content = fix(i[1])
           url = fix(i[2])

           self.results.append(YahooResult(title, content, url))

       return self.results

   def getnextpage(self):
       return YahooSearch(self.query, self.page+1)

   results = property(fget=getresults)
   nextpage = property(fget=getnextpage)

class YahooResult:

   def __init__(self,title,content,url):
       self.title = title
       self.content = content
       self.url = url

  1. Usage:

x = YahooSearch("test")

for result in x.results:

   print result.title</lang>