#!/usr/bin/perl use URI::Escape; use LWP 5.64; use HTML::FormatText; use HTML::Parse; my $BASE_URL = 'http://www.amazon.com/gp/reader/0380977427/?v=search-inside&keywords='; my $string = "enoch rounds the corner"; my $browser = LWP::UserAgent->new; my @ns_headers = ( 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)', ); while ($string){ my $url = $BASE_URL.uri_escape($string); my $response = $browser->get( $url, @ns_headers ); die "Can't get $url -- ", $response->status_line unless $response->is_success; my $content = $response->content; if ($content =~ /<td class="small">"\.\.\.\s(.*?)\s\.\.\."/si){ $text = $1; $text = HTML::FormatText->new->format(parse_html($text)); print "\n===$text===\n"; if ($text =~ /(.*?)(.{30})$/s){ $first_part = $1; $string = $2; if ($first_part =~ /\s([a-zA-Z]+?)$/){ $string = $1.$string; } }else{ $string = 0; } }else{ $string = 0; } }