#!/usr/bin/perl

use WWW::Mechanize;

my $URL = 'http://scholar.google.com/advanced_scholar_search';
my $FORM_NAME = 'f';
my $AUTHOR = $ARGV[0];
my $TITLE = $ARGV[1];

my $mech = WWW::Mechanize->new(stack_depth=>10);

$mech->get($URL) || die ("Could not connect to $URL.\n");

my $res = $mech->submit_form(
	form_name => $FORM_NAME,
	fields => {
		'num' => 100,
		'as_epq' => $TITLE,
		'as_occt' => 'title',
		'as_sauthors' => $AUTHOR,
		'as_allsubj' => 'all',
	},
);

while ($res && $res->is_success()){
	my $content = $res->content;
	#print $content;
	while ($content =~ /<p class=g>(.*?)<\/font>\s\s\s/gs){
		my $section = $1;
	
		my $title	= "";
		my $citedby	= 0;

		# get title
		$title = getTitle($section);
		$title =~ s/<.*?>//g;
		$title =~ s/&hellip;/\.\.\./g;
	
		# get citedby #
		$citedby = getCitedBy($section);
		
		if ($citedby){
			print "\"$title\"\nCited by: $citedby\n\n";
		}
	}
	$res = $mech->follow_link( text_regex => qr/Next/i);
}

#############################################################################

sub getTitle($){
	my ($section) = @_;
	my $title;

	if ($section =~ /<span class="w">.*?<a href.*?>(.*?)<\/a><\/span>/s){ 	# papers with a link
		$title = $1;
		
	}elsif ($section =~ /&nbsp;(.*?)<font size=-1>/s){			# papers w/o a link
		$title = $1;
	}else{
		die ("Could not scrape title! Here's a code excerpt:\n$section\n");
	}
	return $title;
}

#----------------------------------------------------------------------------
sub getCitedBy($){
	my ($section) = @_;
	my $citedby;

	if ($section =~ />Cited by (\d+)</s){
		$citedby = $1;
	}
	return $citedby;
}

#----------------------------------------------------------------------------