Googling


If there was extra time I was going to talk about my googling current awareness program. I thought it was a simple but effective use of SOAP. If you want to run it, download it.



     1	#!/usr/bin/perl
     2	
     3	=head 1 NAME
     4	
     5	    googling - current awareness via email
     6	
     7	=head1 SYNOPSIS
     8	
     9	    googling --query='funkadelic' --email='george@clinton.com'
    10		--key='IXJGrPF_THIS_WON'T_WORK_1bB8+FCO';
    11	
    12	=head1 DESCRIPTION
    13	
    14	googling will perform a search of Google and send results to an email 
    15	or to STDOUT if no email is given. 
    16	
    17	=head1 OPTIONS
    18	
    19	=head2 --query
    20	
    21	You must pass this in since it's the query you want to run against google.
    22	
    23	=head2 --email
    24	
    25	If you'd like to have results sent via email use this option. You can use
    26	more than once if you'd like the report to go to more than one address.
    27	If not supplied results will go to STDOUT. It's a useful option if you want
    28	(as I did) to run the query from cron.
    29	
    30	=head2 --db
    31	
    32	Specify the sqlite database to use. If not supplied it defaults to 
    33	googling.db in the pwd.
    34	
    35	=head2 --key
    36	
    37	Specify your Google API Key. See http://www.google.com/apis/ to get your 
    38	own.
    39	
    40	=head2 --limit
    41	
    42	If you'd like to stop checking google after a certain amount of hits use
    43	this option. Default is 100.
    44	
    45	=head2 --debug
    46	
    47	See diagnostic information.
    48	
    49	=head2 --help
    50	
    51	See this message.
    52	
    53	=head1 SEE ALSO
    54	
    55	=head1 AUTHOR
    56	
    57	=over 4
    58	
    59	=item Ed Summers E<lt>ehs@pobox.comE<gt>
    60	
    61	=back
    62	
    63	=cut
    64	
    65	use strict;
    66	use warnings;
    67	use Getopt::Long;
    68	use Pod::Usage;
    69	use SOAP::Lite;
    70	use DBI;
    71	use Mail::Send;
    72	
    73	## gather options
    74	my ( $limit, $query, $db, $debug, $help, $googleId, @emails );
    75	GetOptions( 
    76	    'limit:i'	=> \$limit,
    77	    'query:s'	=> \$query,
    78	    'email:s'	=> \@emails,
    79	    'key:s'	=> \$googleId,
    80	    'db:s'	=> \$db,
    81	    'debug!'	=> \$debug,
    82	    'help!'	=> \$help,
    83	);
    84	
    85	if ( !$query or !$googleId or $help ) { pod2usage( 1 ); }
    86	
    87	$limit = 100 if !$limit;
    88	$db = 'googling.db' if ! $db;
    89	
    90	
    91	## database access to remember what results have been seen 
    92	## in previous runs
    93	my $dbh = getDbh( $db );
    94	my $select = $dbh->prepare( qq( 
    95	    select count(*) from seen where url = ? and query = ?
    96	) );
    97	my $insert = $dbh->prepare( qq( 
    98	    insert into seen (url,query) values(?,?) 
    99	) );
   100	
   101	
   102	## build the variables that we are going to pass to our SOAP request
   103	
   104	SOAP::Data->import('name');
   105	
   106	
   107	my $key = name('key')->value( $googleId );
   108	my $q = name('q',$query)->type('string');
   109	my $maxResults = name('maxResults',10)->type('int');
   110	my $filter = name('filter',1)->type('boolean');
   111	my $restrict = name('restrict','')->type('string');
   112	my $safeSearch = name('safeSearch',0)->type('boolean');
   113	my $langRestrict = name('lr','')->type('string');
   114	my $inputEncoding = name('ie','')->type('string');
   115	my $outputEncoding = name('oe','')->type('string');
   116	
   117	## storage variables 
   118	my @results = ();
   119	my $more = 1;
   120	my $startElement = 0;
   121	my $results;
   122	
   123	## while we haven't exceeded the citation limit 
   124	while ( $startElement < $limit ) {
   125	
   126	    ## create start element based on the last result record we read in
   127	    ## google limits query results to a set of 10 at a time
   128	    my $start = name('start',$startElement)->type('int');
   129	
   130	    ## issue the query
   131	    my $response = SOAP::Lite
   132		-> uri( 'urn:GoogleSearch' )
   133		-> proxy( 'http://api.google.com/search/beta2' )
   134		-> doGoogleSearch( $key, $q, $start, $maxResults, $filter, 
   135		    $restrict, $safeSearch, $langRestrict, $inputEncoding, 
   136		    $outputEncoding );
   137	
   138	    ## if we didn't get any more results we're done
   139	    last if ( $startElement == $response->result->{ endIndex } );
   140	
   141	    ## go through each item in the result set
   142	    foreach my $item ( @{ $response->result()->{ resultElements } } ) {
   143	
   144		## look in the db to see if we've seen this url for this query before	
   145		$select->execute( $item->{ URL }, $query );
   146		my ( $result ) = $select->fetchrow_array();
   147	
   148		## if it's a new url add it to the database (so we don't
   149		## report it again), and add it to the results
   150		if ( ! $result ) {
   151		    $insert->execute( $item->{ URL }, $query );
   152		    if ( !@emails ) { 
   153			print $item->{ URL }, "\n", $item->{ snippet }, "\n", "\n";
   154		    } else {
   155			push( @results, [ $item->{ URL }, $item->{ snippet } ] );
   156		    }
   157		}
   158	
   159	    }
   160	
   161	    ## set the next start element to the index of the last item in this
   162	    ## result set
   163	    $startElement = $response->result->{ endIndex };
   164	
   165	}
   166	
   167	## if new results were found, and we have been asked to send an email
   168	if ( @results and @emails ) { 
   169	    my $msg = Mail::Send->new();
   170	    $msg->to( join( ',', @emails ) );
   171	    $msg->subject( "googling: $query" );
   172	    my $fh = $msg->open( 'qmail' );
   173	    my $count = 1;
   174	    foreach ( @results ) { 
   175		$fh->print( "[$count] ", $_->[0], "\n" );
   176		if ( $_->[1] ) { $fh->print( $_->[1], "\n" ); }
   177		$fh->print( "\n" );
   178		$count++;
   179	    }
   180	    $fh->close();
   181	}
   182	
   183	## disconnect from db
   184	$select->finish();
   185	$insert->finish();
   186	$dbh->disconnect();
   187	
   188	
   189	## done
   190	
   191	
   192	sub getDbh {
   193	    my $db = shift;
   194	    ## if the database file exists attempt to connect to it
   195	    if ( -f $db ) { 
   196		return( DBI->connect( "dbi:SQLite:$db" ) );
   197	    }
   198	    ## otherwise create the datbase, and table
   199	    my $dbh = DBI->connect( "dbi:SQLite:$db" );
   200	    $dbh->do( 'create table seen (url varchar(500), query varchar(500))' );
   201	    return( $dbh );
   202	}
   203	
   204