Yahoo! search interface: Difference between revisions
Content deleted Content added
tcl = incorrect |
→{{header|Tcl}}: add example using OO and an html parser |
||
Line 261: | Line 261: | ||
puts [dict get [$it] title] |
puts [dict get [$it] title] |
||
after 300 ;# Slow the code down... :-) |
after 300 ;# Slow the code down... :-) |
||
}</lang> |
|||
Another approach: uses a class as specified in the task. Also, uses an html parser (parsing html with regular expressions is a particular annoyance of mine). |
|||
{{works with|Tcl|8.6}} |
|||
<lang tcl>package require Tcl 8.6 |
|||
package require http |
|||
package require htmlparse |
|||
package require textutil::adjust |
|||
oo::class create yahoosearch { |
|||
method search {s} { |
|||
my variable searchterm page baseurl |
|||
set searchterm $s |
|||
set page 1 |
|||
set baseurl {http://search.yahoo.com/search} |
|||
} |
|||
method getresults {} { |
|||
my variable state results current_data |
|||
set results [list] |
|||
set current_data [dict create] |
|||
set state looking_for_results |
|||
htmlparse::parse -cmd [list [self] html_parser_callback] [my gethtml] |
|||
lappend results $current_data |
|||
} |
|||
method nextpage {} { |
|||
my variable page |
|||
incr page 10 |
|||
my getresults |
|||
} |
|||
method nextresult {} { |
|||
my variable results page |
|||
if { ! [info exists results]} { |
|||
my getresults |
|||
} elseif {[llength $results] == 0} { |
|||
my nextpage |
|||
} |
|||
set results [lassign $results result] |
|||
return $result |
|||
} |
|||
method gethtml {} { |
|||
my variable searchterm page baseurl |
|||
set url [format {%s?%s} $baseurl [::http::formatQuery p $searchterm b $page]] |
|||
set response [http::geturl $url] |
|||
set html [http::data $response] |
|||
http::cleanup $response |
|||
return $html |
|||
} |
|||
method html_parser_callback {tag slash param textBehindTheTag} { |
|||
my variable state results current_data |
|||
switch -exact -- $state { |
|||
looking_for_results { |
|||
if {$tag eq "div" && [string first {id="main"} $param] != -1} { |
|||
set state ready |
|||
} |
|||
} |
|||
ready { |
|||
if {$tag eq "div" && [string first {class="res} $param] != -1} { |
|||
if {[dict size $current_data] > 0} {lappend results $current_data} |
|||
set current_data [dict create] |
|||
set state getting_url |
|||
} |
|||
} |
|||
getting_url { |
|||
if {$tag eq "a" && [string match "*yschttl spt*" $param]} { |
|||
if {[regexp {href="(.+?)"} $param - url]} { |
|||
dict set current_data url $url |
|||
} else { |
|||
dict set current_data url "no href in tag params: '$param'" |
|||
} |
|||
dict set current_data title $textBehindTheTag |
|||
set state getting_title |
|||
} |
|||
} |
|||
getting_title { |
|||
if {$tag eq "a" && $slash eq "/"} { |
|||
set state looking_for_abstract |
|||
} else { |
|||
dict append current_data title $textBehindTheTag |
|||
} |
|||
} |
|||
looking_for_abstract { |
|||
if {$tag eq "span" && [string first {class="url} $param] != -1} { |
|||
set state ready |
|||
} elseif {$tag eq "div" && [string first {class="abstr} $param] != -1} { |
|||
dict set current_data abstract $textBehindTheTag |
|||
set state getting_abstract |
|||
} |
|||
} |
|||
getting_abstract { |
|||
if {$tag eq "div" && $slash eq "/"} { |
|||
set state ready |
|||
} else { |
|||
dict append current_data abstract $textBehindTheTag |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
yahoosearch create searcher |
|||
searcher search "search text here" |
|||
for {set x 1} {$x <= 15} {incr x} { |
|||
set result [searcher nextresult] |
|||
dict with result { |
|||
puts $title |
|||
puts $url |
|||
puts [textutil::adjust::indent [textutil::adjust::adjust $abstract] " "] |
|||
puts "" |
|||
} |
|||
}</lang> |
}</lang> |