#!/usr/bin/perl use WWW::Mechanize; my $URL = 'http://scholar.google.com/advanced_scholar_search'; my $FORM_NAME = 'f'; my $AUTHOR = $ARGV[0]; my $TITLE = $ARGV[1]; my $mech = WWW::Mechanize->new(stack_depth=>10); $mech->get($URL) || die ("Could not connect to $URL.\n"); my $res = $mech->submit_form( form_name => $FORM_NAME, fields => { 'num' => 100, 'as_epq' => $TITLE, 'as_occt' => 'title', 'as_sauthors' => $AUTHOR, 'as_allsubj' => 'all', }, ); while ($res && $res->is_success()){ my $content = $res->content; #print $content; while ($content =~ /<p class=g>(.*?)<\/font>\s\s\s/gs){ my $section = $1; my $title = ""; my $citedby = 0; # get title $title = getTitle($section); $title =~ s/<.*?>//g; $title =~ s/…/\.\.\./g; # get citedby # $citedby = getCitedBy($section); if ($citedby){ print "\"$title\"\nCited by: $citedby\n\n"; } } $res = $mech->follow_link( text_regex => qr/Next/i); } ############################################################################# sub getTitle($){ my ($section) = @_; my $title; if ($section =~ /<span class="w">.*?<a href.*?>(.*?)<\/a><\/span>/s){ # papers with a link $title = $1; }elsif ($section =~ / (.*?)<font size=-1>/s){ # papers w/o a link $title = $1; }else{ die ("Could not scrape title! Here's a code excerpt:\n$section\n"); } return $title; } #---------------------------------------------------------------------------- sub getCitedBy($){ my ($section) = @_; my $citedby; if ($section =~ />Cited by (\d+)</s){ $citedby = $1; } return $citedby; } #----------------------------------------------------------------------------