#!/usr/bin/perl

use warnings;
use strict;           # Don't allow sloppy syntax
use CGI;
use LWP::Simple;
use XML::RSS;
use CHI;

my $DEBUG = 0;
my $BASEURL = 'http://tv.repubblica.it/';
my $LISTURL = 'php/crontab/ssi_list_scelti_per_voi.php?cat_id=';

my %months = (
	"gennaio" => 1,
	"febbraio" => 2,
	"marzo" => 3,
	"aprile" => 4,
	"maggio" => 5,
	"giugno" => 6,
	"luglio" => 7,
	"agosto" => 8,
	"settembre" => 9,
	"ottobre" => 10,
	"novembre" => 11,
	"dicembre" => 12
);

my $cache = CHI->new( driver => 'File',
        root_dir => '/home/mala/reprssCache'
);

# What did the user ask for?
my $params      = new CGI;
# note: categories are
# Copertina	15
# Politica	36
# Cronaca	17
# Mondo		18
# Sport		19
# Spett&Cultura	20
# Tecno&Scienze	21
# Politica/Dossier	16
my $category	= $params->param('cat') || 15;
my $pagenum	= $params->param('page') || 1;

$category	= $ARGV[0] || 15 if $DEBUG;
$pagenum	= $ARGV[1] || 1 if $DEBUG;

print "Content-type: text/xml\n\n";
die "Usage: perl repubblica.pl <category> <page>\n\n" unless $category;

my $URL = $BASEURL.$LISTURL.$category."&page=$pagenum";
my $rss = $cache->get($URL);
if (defined $rss) {
	# if the RSS feed has already been cached just print it
	print $rss; exit;
}

# create base contents for RSS feed
$rss = new XML::RSS (version => '2.0');
$rss->add_module(prefix=>'media', uri=>'http://search.yahoo.com/mrss/');
$rss->add_module(prefix=>'dc', uri=>'http://purl.org/dc/elements/1.1/');

$rss->channel(
   title        => "RepubblicaTV",
   link         => "http://tv.repubblica.it",
   description  => "An RSS feed for RepubblicaTV videos",
   generator    => "+mala's perl script",
   dc => {
     date       => '2011-10-15T00:00+00:00',
     subject    => "News",
     creator    => 'malattia@gmx.net',
     language   => 'it',
   }
);

print "Connecting to $URL\n" if $DEBUG;
my $page = get($URL) || die ("Can't connect to $URL\n\n");

while ($page =~ /<div class="mediaItem">(.*?)<\/div>/gsi){
	my %newsItem;
	getBaseInfo (\%newsItem, $1);
	next unless isValidUrl($newsItem{'URL'});

	# getDetailedInfo connects to $newsItem{'URL'} to get the video URL and a description	
	getDetailedInfo (\%newsItem);
	next unless $newsItem{'videoURL'};

	# only show if you got the video URL right
	dumpNewsItem (\%newsItem) if $DEBUG;
	addNewsItem (\%newsItem, $rss);
}

$cache->set( $URL , $rss->as_string, "60 minutes" );

print $rss->as_string;
exit 0; 

sub getBaseInfo {
	my ($newsRef, $snippet) = @_;
	if ($snippet =~ /a href="([^"]+)".*?title="([^"]+)".*?src="([^"]+)"/si){
		$$newsRef{'URL'} = $BASEURL.$1;
		$$newsRef{'Title'} = $2;
		$$newsRef{'Thumb'} = $3;
	}	
}

sub getDetailedInfo {
	my ($newsRef) = @_;
	
	my $content = get ($$newsRef{'URL'});

	# get video URL	
	if ($content =~ /'pcUrl',\s*'([^']+)'/){
		$$newsRef{'videoURL'} = $1;
	}

	# get description
	if ($content =~ /<div id="vi_abstract">\s*(.*)\s*<\/div>/){
		$$newsRef{'description'} = $1;
		$$newsRef{'description'} =~ s/(<[^<]+>)//gsi;
	}

	# get date
	if ($content =~ /<p class="date-player">\(([^<]+)\)</){
		$$newsRef{'date'} = dateConvert($1);
	}
}

sub dumpNewsItem {
	my ($newsRef) = @_;
	print "--------------------------------------------\n";
	print "URL  : " .$$newsRef{'URL'}. "\n";
	print "Title: " .$$newsRef{'Title'}. "\n";
	print "Thumb: " .$$newsRef{'Thumb'}. "\n";
	print "Video: " .$$newsRef{'videoURL'}. "\n";
	print "Descr: " .$$newsRef{'description'}. "\n";
	print "Date : " .$$newsRef{'date'}. "\n\n";
}

sub isValidUrl {
	my ($url) = @_;
	
	return 0 unless $url;

	# URLs we know are not absolute
	return 0 if ($url =~ /\/http:\/\//);

	return 1;
}

sub dateConvert {
	my ($oldDate) = @_;
	my ($day, $month, $year) = split (/ /, $oldDate);
	$day = sprintf ("%02d", $day);
	$month = sprintf ("%02d", $months{$month});
	return "$year-$month-$day"."T00:00+00:00";
}

sub addNewsItem {
	my ($newsRef, $rss) = @_;

	$rss->add_item (
		title       => $$newsRef{'Title'},
		link        => $$newsRef{'URL'},
		description => $$newsRef{'description'},
		pubDate     => $$newsRef{'date'},
		media => {
			thumbnail => {url => $$newsRef{'Thumb'}, heigth => "100", width => "100"},
			content => { url => $$newsRef{'videoURL'}, type => "video/mp4"},
		}
	);
}