Web scraping: Difference between revisions

no edit summary
(→‎{{header|UNIX Shell}}: present a real unix shell solution (basically fixing the text around it))
No edit summary
Line 93:
return 0;
}</lang>
 
=={{header|C++}}==
 
some basic C-based Unix socket creation, plus C++-based text extraction from strings
<lang C++>
#include <sys/types.h>
#include <sys/socket.h>
#include <iostream>
#include <netinet/in.h>
#include <netdb.h>
#include <string>
#include <arpa/inet.h>
 
int main( int argc , char * argv[ ] ) {
int length = 0 ;
int result = 0 ;
int number = 0 ;
struct sockaddr_in address ;
char characters_received[ 65000 ] ;
unsigned short int portnummer = 80 ;
const char host[ ] = "tycho.usno.navy.mil" ;
char request[ ] = "GET /cgi-bin/timer.pl HTTP/1.1\r\nHost: PC\r\n\r\n" ;
hostent * phe = gethostbyname( host ) ; //resolving the host
if ( phe == 0 ) {
std::cout << "host name " << host << " could not be resolved!\n" ;
return 1 ;
}
int socket_nummer = socket( AF_INET, SOCK_STREAM , 0 ) ;
if ( socket_nummer == -1 ) {
std::cout << "no socket possible\n" ;
return 2 ;
}
address.sin_family = AF_INET ;
address.sin_port = htons( portnummer ) ;
char **p = phe->h_addr_list ;
do { //find suitable address
if ( *p == NULL ) {
std::cout << "no connection\n" ;
return 3 ;
}
address.sin_addr.s_addr = *reinterpret_cast<unsigned long*>( *p ) ;
++p ;
result = connect( socket_nummer, reinterpret_cast<sockaddr*>( &address ) ,
sizeof( address )) ;
} while ( result == -1 ) ;
number = write ( socket_nummer, request ,
sizeof( request) ) ;
number = read( socket_nummer, characters_received,
sizeof( characters_received )) ;
characters_received[ number ] = '\0' ;
if ( number > 0 ) {
std::string text ( characters_received ) ;//find UTC time string
std::string::size_type found = text.find( "UTC" , 0 ) ;
std::string::size_type start = text.rfind( "<BR>" , found ) ;
std::cout << "UTC time is " << text.substr( start + 4 , found -
( start + 4 ) ) << "!\n" ;
}
close( socket_nummer ) ;
return 0 ;
}
</lang>
 
=={{header|Common Lisp}}==
262

edits