#!/usr/bin/perl # use WWW::Mechanize; $URL = 'http://www.dailysudoku.co.uk/cgi-bin/sudoku/archive.pl'; $TPLFILE = 'tpl_sudoku.tex'; # name of the template used to generate tex file $OUTFILE = 'sudoki.tex'; # name of the output tex file $SLEEP = 2; # sleep 2 seconds between GETs $| = 1; # do not buffer output # create a new mech my $mech = WWW::Mechanize->new(); # download the archive page my $res = $mech->get($URL); # follow all links and download sudokus for (reverse $mech->find_all_links(url_regex => qr|/sudoku/archive/.*?shtml|)){ sleep $SLEEP; my $url_sudo = $_->url_abs; print "Working on $url_sudo:\n"; print "\tDownloading url... "; $res = $mech->get($url_sudo); die "Error downloading url\n" unless $res->is_success; print "Submitting form... "; $res = $mech->submit_form; die "Error submitting form\n" unless $res->is_success; print "Downloading pdf... "; $res = $mech->follow_link(text=> "click here"); die "Error downloading pdf\n" unless (defined($res) && $res->is_success); # in $pag_pdf you have the sudoku in pdf format my $pag_pdf = $res->content; my @sudoku; my $caption; # extract the caption from pdf if ($pag_pdf =~ /435 Td \((.*?)\) .*?435 Td \((.*?)\)/si){ $caption = "$2 ($1)"; } # extract the sudoku from pdf while ($pag_pdf =~ /20 Tf ([^\s]+) ([^\s]+) Td \((\d)\)/gi){ my $y = ($1 - 189.45) / 30; my $x = 8 - (($2 - 458) / 30); my $n = $3; $sudoku[$x][$y] = "$n"; } my $sudo = toLatex(toText(@sudoku),$caption); push @sudokistack, $sudo; print "Ok\n"; } # create the string containing all the sudokus # (why here? 'cause this way, if you want, you can execute # other operations on the stack) for (@sudokistack){ $sudoki .= $_; } # load template file my $TPL = ""; open (IN, "<$TPLFILE") || die "Could not open template file $TPLFILE!\n\n";; while (<IN>){ $TPL .= $_; } close IN; $TPL =~ s/###SUDOKI###/$sudoki/; open (OUT, ">$OUTFILE"); print OUT $TPL; close OUT; exit; # -------------------------------------------------------------------------- # function toText receives an array containing the sudoku and returns a # string representing it # sub toText { # @sudoku is the 9x9 array containing the sudoku my @sudoku = @_; # $sudo is the string which will contain the sudoku once # converted in plain text my $sudo = ""; for ($i=0;$i<9;$i++){ $sudo .= "|"; # line starts here for ($j=0;$j<9;$j++){ $sudo .= $sudoku[$i][$j]."|"; # sudoku cell } $sudo .= ".\n"; # end of line } return $sudo; } # -------------------------------------------------------------------------- # toLatex function receives a sudoky in the string format returned by toText # and a caption to insert below the sudoku. It then returns LaTeX text, # containing the code needed to display the sudoku # sub toLatex { my ($sudo,$caption) = @_; my $tex = ""; $tex .= "\\begin{sudoku}\n$sudo\\end{sudoku}\n"; #sudoku $tex .= "\\begin{center}\n$caption\n\\end{center}\n"; #caption if ($flag){ # insert a pagebreak every 2 sudoku $tex .= "\n\\pagebreak\n\n"; $flag = 0; }else{ $tex .= "\n\\verb||\n\n"; $flag = 1; } return $tex; } # -------------------------------------------------------------------------- # --------------------------------------------------------------------------