Yahoo! search interface: Difference between revisions

Added Wren
(Added Wren)
Line 2,988:
</pre>
 
=={{header|Wren}}==
{{libheader|libcurl}}
{{libheader|Wren-pattern}}
An embedded program so we can use libcurl.
 
A somewhat ''ad hoc'' solution as the output format for Yahoo! search seems to change quite frequently. All I can say is that this worked on 5th January, 2022.
<lang ecmascript>/* yahoo_search_interface.wren */
 
import "./pattern" for Pattern
 
class YahooSearch {
construct new(url, title, desc) {
_url = url
_title = title
_desc = desc
}
 
toString { "URL: %(_url)\nTitle: %(_title)\nDescription: %(_desc)\n" }
}
 
var CURLOPT_URL = 10002
var CURLOPT_FOLLOWLOCATION = 52
var CURLOPT_WRITEFUNCTION = 20011
var CURLOPT_WRITEDATA = 10001
 
foreign class Buffer {
construct new() {} // C will allocate buffer of a suitable size
 
foreign value // returns buffer contents as a string
}
 
foreign class Curl {
construct easyInit() {}
 
foreign easySetOpt(opt, param)
 
foreign easyPerform()
 
foreign easyCleanup()
}
 
var curl = Curl.easyInit()
 
var getContent = Fn.new { |url|
var buffer = Buffer.new()
curl.easySetOpt(CURLOPT_URL, url)
curl.easySetOpt(CURLOPT_FOLLOWLOCATION, 1)
curl.easySetOpt(CURLOPT_WRITEFUNCTION, 0) // write function to be supplied by C
curl.easySetOpt(CURLOPT_WRITEDATA, buffer)
curl.easyPerform()
return buffer.value
}
 
var p1 = Pattern.new("class/=\" d-ib ls-05 fz-20 lh-26 td-hu tc va-bot mxw-100p\" href/=\"[+1^\"]\"")
var p2 = Pattern.new("class/=\" d-ib p-abs t-0 l-0 fz-14 lh-20 fc-obsidian wr-bw ls-n pb-4\">[+1^<]<")
var p3 = Pattern.new("<span class/=\" fc-falcon\">[+1^<]<")
 
var pageSize = 7
var totalCount = 0
 
var yahooSearch = Fn.new { |query, page|
System.print("Page %(page):\n=======\n")
var next = (page - 1) * pageSize + 1
var url = "https://search.yahoo.com/search?fr=opensearch&pz=%(pageSize)&p=%(query)&b=%(next)"
var content = getContent.call(url).replace("<b>", "").replace("</b>", "")
var matches1 = p1.findAll(content)
var count = matches1.count
if (count == 0) return false
var matches2 = p2.findAll(content)
var matches3 = p3.findAll(content)
totalCount = totalCount + count
var ys = List.filled(count, null)
for (i in 0...count) {
var url = matches1[i].capsText[0]
var title = matches2[i].capsText[0]
var desc = matches3[i].capsText[0].replace("&#39;", "'")
ys[i] = YahooSearch.new(url, title, desc)
}
System.print(ys.join("\n"))
return true
}
 
var page = 1
var limit = 2
var query = "rosettacode"
System.print("Searching for '%(query)' on Yahoo!\n")
while (page <= limit && yahooSearch.call(query, page)) {
page = page + 1
System.print()
}
System.print("Displayed %(limit) pages with a total of %(totalCount) entries.")
curl.easyCleanup()</lang>
<br>
We now embed this script in the following C program, build and run.
<lang c>/* gcc yahoo_search_interface.c -o yahoo_search_interface -lcurl -lwren -lm */
 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
#include "wren.h"
 
struct MemoryStruct {
char *memory;
size_t size;
};
 
/* C <=> Wren interface functions */
 
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
if(!ptr) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
return 0;
}
 
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
 
void C_bufferAllocate(WrenVM* vm) {
struct MemoryStruct *ms = (struct MemoryStruct *)wrenSetSlotNewForeign(vm, 0, 0, sizeof(struct MemoryStruct));
ms->memory = malloc(1);
ms->size = 0;
}
 
void C_bufferFinalize(void* data) {
struct MemoryStruct *ms = (struct MemoryStruct *)data;
free(ms->memory);
}
 
void C_curlAllocate(WrenVM* vm) {
CURL** pcurl = (CURL**)wrenSetSlotNewForeign(vm, 0, 0, sizeof(CURL*));
*pcurl = curl_easy_init();
}
 
void C_value(WrenVM* vm) {
struct MemoryStruct *ms = (struct MemoryStruct *)wrenGetSlotForeign(vm, 0);
wrenSetSlotString(vm, 0, ms->memory);
}
 
void C_easyPerform(WrenVM* vm) {
CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
curl_easy_perform(curl);
}
 
void C_easyCleanup(WrenVM* vm) {
CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
curl_easy_cleanup(curl);
}
 
void C_easySetOpt(WrenVM* vm) {
CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
CURLoption opt = (CURLoption)wrenGetSlotDouble(vm, 1);
if (opt < 10000) {
long lparam = (long)wrenGetSlotDouble(vm, 2);
curl_easy_setopt(curl, opt, lparam);
} else if (opt < 20000) {
if (opt == CURLOPT_WRITEDATA) {
struct MemoryStruct *ms = (struct MemoryStruct *)wrenGetSlotForeign(vm, 2);
curl_easy_setopt(curl, opt, (void *)ms);
} else if (opt == CURLOPT_URL) {
const char *url = wrenGetSlotString(vm, 2);
curl_easy_setopt(curl, opt, url);
}
} else if (opt < 30000) {
if (opt == CURLOPT_WRITEFUNCTION) {
curl_easy_setopt(curl, opt, &WriteMemoryCallback);
}
}
}
 
WrenForeignClassMethods bindForeignClass(WrenVM* vm, const char* module, const char* className) {
WrenForeignClassMethods methods;
methods.allocate = NULL;
methods.finalize = NULL;
if (strcmp(module, "main") == 0) {
if (strcmp(className, "Buffer") == 0) {
methods.allocate = C_bufferAllocate;
methods.finalize = C_bufferFinalize;
} else if (strcmp(className, "Curl") == 0) {
methods.allocate = C_curlAllocate;
}
}
return methods;
}
 
WrenForeignMethodFn bindForeignMethod(
WrenVM* vm,
const char* module,
const char* className,
bool isStatic,
const char* signature) {
if (strcmp(module, "main") == 0) {
if (strcmp(className, "Buffer") == 0) {
if (!isStatic && strcmp(signature, "value") == 0) return C_value;
} else if (strcmp(className, "Curl") == 0) {
if (!isStatic && strcmp(signature, "easySetOpt(_,_)") == 0) return C_easySetOpt;
if (!isStatic && strcmp(signature, "easyPerform()") == 0) return C_easyPerform;
if (!isStatic && strcmp(signature, "easyCleanup()") == 0) return C_easyCleanup;
}
}
return NULL;
}
 
static void writeFn(WrenVM* vm, const char* text) {
printf("%s", text);
}
 
void errorFn(WrenVM* vm, WrenErrorType errorType, const char* module, const int line, const char* msg) {
switch (errorType) {
case WREN_ERROR_COMPILE:
printf("[%s line %d] [Error] %s\n", module, line, msg);
break;
case WREN_ERROR_STACK_TRACE:
printf("[%s line %d] in %s\n", module, line, msg);
break;
case WREN_ERROR_RUNTIME:
printf("[Runtime Error] %s\n", msg);
break;
}
}
 
char *readFile(const char *fileName) {
FILE *f = fopen(fileName, "r");
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
rewind(f);
char *script = malloc(fsize + 1);
fread(script, 1, fsize, f);
fclose(f);
script[fsize] = 0;
return script;
}
 
static void loadModuleComplete(WrenVM* vm, const char* module, WrenLoadModuleResult result) {
if( result.source) free((void*)result.source);
}
 
WrenLoadModuleResult loadModule(WrenVM* vm, const char* name) {
WrenLoadModuleResult result = {0};
if (strcmp(name, "random") != 0 && strcmp(name, "meta") != 0) {
result.onComplete = loadModuleComplete;
char fullName[strlen(name) + 6];
strcpy(fullName, name);
strcat(fullName, ".wren");
result.source = readFile(fullName);
}
return result;
}
 
int main(int argc, char **argv) {
WrenConfiguration config;
wrenInitConfiguration(&config);
config.writeFn = &writeFn;
config.errorFn = &errorFn;
config.bindForeignClassFn = &bindForeignClass;
config.bindForeignMethodFn = &bindForeignMethod;
config.loadModuleFn = &loadModule;
WrenVM* vm = wrenNewVM(&config);
const char* module = "main";
const char* fileName = "yahoo_search_interface.wren";
char *script = readFile(fileName);
WrenInterpretResult result = wrenInterpret(vm, module, script);
switch (result) {
case WREN_RESULT_COMPILE_ERROR:
printf("Compile Error!\n");
break;
case WREN_RESULT_RUNTIME_ERROR:
printf("Runtime Error!\n");
break;
case WREN_RESULT_SUCCESS:
break;
}
wrenFreeVM(vm);
free(script);
return 0;
}</lang>
 
{{out}}
Limited to first two pages.
<pre>
Searching for 'rosettacode' on Yahoo!
 
Page 1:
=======
 
URL: https://rosettacode.org/wiki/Rosetta_Code
Title: rosettacode.org
Description: Enjoy amazing savings with our 50% off
 
URL: https://en.wikipedia.org/wiki/Rosetta_Code
Title: en.wikipedia.org
Description: Rosetta Code is a programming chrestomathy site. The idea is to present solutions to the same task in as many different languages as possible, to demonstrate how languages are similar and different, and to aid a person with a grounding in one approach to a problem in learning another.
 
URL: https://raku.org/community/rosettacode/
Title: raku.org
Description: Rosetta Code is a wiki-based programming website with implementations of common algorithms and solutions to various programming problems in many different programming languages. It is named for the Rosetta Stone , which has the same text inscribed on it in three languages, and thus allowed Egyptian hieroglyphs to be deciphered for the first time.
 
URL: https://metacpan.org/dist/RosettaCode/view/bin/rosettacode
Title: metacpan.org
Description: Raku on Rosetta Code. Rosetta Code is a community site that presents solutions to programming tasks in many different languages. It is an excellent place to see how various languages approach the same task, as well as to get a feel for the style and fluent use of each language across a variety of domains.
 
URL: https://simple.wikipedia.org/wiki/Rosetta_Code
Title: simple.wikipedia.org
Description: To install RosettaCode, copy and paste the appropriate command in to your terminal. cpanm. cpanm RosettaCode. CPAN shell. perl -MCPAN -e shell install RosettaCode
 
 
Page 2:
=======
 
URL: https://improbotics.org/rosetta-code/
Title: improbotics.org
Description: Enjoy amazing savings with our 50% off
 
URL: https://en.wikipedia.org/wiki/Rosetta_Code
Title: en.wikipedia.org
Description: Rosetta Code was scheduled to perform at Brighton Fringe 2020 at The Warren: Theatre Box and at Camden Fringe 2020 at The Cockpit Theatre and will return live in 2021. Our show was the subject of a scientific research paper “Rosetta Code: Improv in Any Language“, published at the International Conference on Computational Creativity 2020.
 
URL: https://www.freecodecamp.org/news/rosetta-code-unlocking-the-mysteries-of-the-programming-languages-that-power-our-world-300b787d8401/
Title: www.freecodecamp.org
Description: Rosetta Code is a wiki-based programming website with implementations of common algorithms and solutions to various programming problems in many different programming languages. It is named for the Rosetta Stone , which has the same text inscribed on it in three languages, and thus allowed Egyptian hieroglyphs to be deciphered for the first time.
 
URL: https://raku.org/community/rosettacode/
Title: raku.org
Description: Rosetta Code — unlocking the mysteries of the programming languages that power our world. Peter Gleeson. The original Rosetta Stone, via History.com. It’s no secret that the tech world is dominated by a relatively small pool of programming languages. While exact figures are difficult to obtain (and no doubt vary over time), you could ...
 
URL: https://rosettacode.org/wiki/SHA-256
Title: rosettacode.org
Description: Raku on Rosetta Code. Rosetta Code is a community site that presents solutions to programming tasks in many different languages. It is an excellent place to see how various languages approach the same task, as well as to get a feel for the style and fluent use of each language across a variety of domains.
 
URL: https://twitter.com/rosettacode
Title: twitter.com
Description: SHA-256 is the recommended stronger alternative to SHA-1.See FIPS PUB 180-4 for implementation details.. Either by using a dedicated library or implementing the ...
 
URL: https://deals.usnews.com/coupons/rosetta-stone
Title: deals.usnews.com
Description: We would like to show you a description here but the site won’t allow us.
 
 
Displayed 2 pages with a total of 12 entries.
</pre>
 
{{omit from|Batch File|Does not have network access.}}
9,476

edits