#!/usr/bin/perl -w
#******************************************************************************
#
#  Showtimes - Movie showtimes for the Palm Computing Platform
#
#  scoot.pm - a module to parse http://cinema.scoot.co.uk/
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#
#*******************************************************************************

#
# Update by Remy Sharp (05-04-2001)
#

package scoot;

use strict;
use Time::Local;

# contructor
sub new {
	my $proto = shift;
	my $class = ref ( $proto ) || $proto;
	my $self = {};
	bless ( $self, $class );
	return $self;
}


# understands
#
# takes a URL and decides if this parser knows how to parse
# the URL
#
sub understands
{
	#General form = http://www.scoot.co.uk/cinemafinder/cinlistings.asp?a=AREACODE&ae=POSTCODE&cc=&c=&ce=&
	my ( $self, $url ) = @_;
	
	if ( $url =~ m|http://www.scoot.co|i ) {
		return 1;
	}
	
	return 0;
}

# copyright
#
# returns the copyright statement to be used, to identify
# the source of the data and keep everyone happy
#
sub copyright
{
	my ( $self ) = @_;
	
	my $thisyear = ( localtime )[5] + 1900;
	return "Data \251 $thisyear Scoot";
}

# parse
#
# takes a URL and is expected to do whatever it takes to
# parse it, i.e. request the necessary page(s) and then
# feed the data to the data module
#
sub parse
{
	my ( $self, $url ) = @_;

	my $content = main::grabpage ( $url, 0 );

	my %months = ( 'January'	=> 0,
			   'February'	=> 1,
			   'March'		=> 2,
			   'April'		=> 3,
			   'May'		=> 4,
			   'June'		=> 5,
			   'July'		=> 6,
			   'August'		=> 7,
			   'September'	=> 8,
			   'October'	=> 9,
			   'November'	=> 10,
			   'December'	=> 11
			 );

	my %rev_months = ( 1 => 'January',
               2=> 'February'   ,
               3=> 'March'      ,
               4=> 'April'      ,
               5=> 'May'        ,
               6=> 'June'       ,
               7=> 'July'       ,
               8=> 'August'     ,
               9=> 'September'  ,
               10=> 'October'    ,
               11=>'November'    ,
               12=>'December'
			 );

	my %days =   ( 'mo' => 0,
			   'tu' => 1,
			   'we' => 2,
			   'th' => 3,
			   'fr' => 4,
			   'sa' => 5,
			   'su' => 6
			 );

	my %dayid_by_dayname;
	my %dayid_by_daytime;

	# Translate linefeeds
	$content =~ tr/\015/\012/;

	#Get array of lines with cinema links
	#print $content;
	my @lines = grep( m|text-decoration|,  split /\012/, $content );
	die "Could find no theatres!\n" unless (@lines);

	my %theatre_count=();
	THEATRE:
	foreach my $linkline (@lines) {
		die "Could not match linkline.\n" unless ($linkline =~ /<a href="(.*)"><font style="text-decoration: none;".*><b>(.*)<\/b><\/font>/);
		my $href = "http://cinema.scoot.co.uk/cinemafinder/" . $1;
		my $theatrename = $2;
		#Check if another cinema with same name (e.g. Odeon Richmond, Surrey)
		if ( ++$theatre_count{$theatrename} > 1 ) { #If duplicate add (2), (3), etc.. to name
			$theatrename .= " ($theatre_count{$theatrename})";
		}
		$href =~ s/&amp;/&/; #Perform &amp; to & substitution in link
		if (main::IgnoringTheater ($theatrename)) {
			print "Ignoring $theatrename.\n";
			next THEATRE;
		}
		print "Downloading $theatrename: ";
		#Get page for this theatre
		$content = main::grabpage ( $href, 0 );
		$content =~ tr/\015/\012/; #Translate linesfeeds..

		#Now look for the valid dates and start adding dates to the database.
		die "Unrecognised location!\n" if ($content =~ /unrecognised location/i );

		#if ($content =~ /Valid from (\w+), (\w+) (\d+), (\d+) to \w+, \w+ \d+, \d+/) {
		#if ($content =~ /Valid from (\d+)\/(\d+)\/(\d+) to (\d+)\/(\d+)\/(\d+)/) {
        #if ($content =~ /<font class="smalltext">\(From (\d+) (\w+) (\d+) to (\d+) (\w+) (\d+)\)<\/font>/s) {

        # RS - 2002-05-21
        # We're going to have to force the date because Scoot have removed the dates
        # from their web site.  I'm going to do this by working out when last Thursday
        # was - becasue this is when the films are released.

        my @now = localtime ();
        my $days_since_thurs = (3 + $now[6]) % 7;   # 3 is the index of thurs, and 7 days in a week

        my $today = date_today();
        my $last_thurs = date_plus($today, -$days_since_thurs);


			# Process date line
			#my ($day,$month,$year)=($1,$2,$3);   # <- this needs to be the last thursday


            my ($year,$month,$day)=split(/-/, $last_thurs);   # <- this needs to be the last thursday

            # just being lazy here - but I can't be bothered to change all of the code
            # referencing $month!
            $month =~ s/^0//;
            $month = %rev_months->{$month};


			my $basetime = timegm ( 0, 0, 0, $day, %months->{$month}, $year );
			my $baseday = ((localtime $basetime)[6] -1) % 7; #Get the day of week when the valid from starts, Mon = 0
			#Buld a set of times for each day of the week to match later
			foreach my $dayname ( keys %days ) {
				my $daytime = $basetime +
				(((7 + $days{$dayname} - $baseday) % 7) * (24*60*60));

				my @datebits = gmtime ($daytime);
				#Check if date in question was already added
				if (exists($dayid_by_daytime{$daytime})) {
					#Already have ID for that date so don't create another
					$dayid_by_dayname{$dayname} = $dayid_by_daytime{$daytime};
				} else {
					#Date not added yet, so create a new date
					my $this_id = main::AddDate (sprintf ("%04d-%02d-%02d",
										   $datebits[5]+1900,
										   $datebits[4]+1,
										   $datebits[3]));
					$dayid_by_dayname{$dayname} = $this_id;
					$dayid_by_daytime{$daytime} = $this_id;
				}
			}
		#} else {
		#	print "Unable to understand valid dates. Skipping\n";
		#	next THEATRE;
		#}
		
		if ($content =~ /no films showing/i) {
			print "No films showing at this cinema. Skipping.\n";
			next THEATRE;
		}

		#Get address and phone number:
		my $theatre_info="";
		#Look for address and tel number together
		my $tel = "";
		if ($content =~ /<b>$theatrename<\/b><\/font>(.*)<\/table>.*tel: ([\d\s]*)<\/font><\/td>\s*<td><img/is) {
			my $address = $1;
			$tel = $2;
			$address =~ s/<br>/\012/gis; #Turn BRs to newlines
			$address =~ s/<[^>]*>//gs; #get rid of html tags
			$address =~ s|^\s*\012||gm; #Get rid of blank newlines.
			$address =~ s|^\s*||gm; #Get rid of leading space
            $address =~ s/Email to a friend//si; # junk text included in the table
			$address =~ s/Print preview//si;

            $address = ucallwords ($address);

			$theatre_info .= $address;
			#print "Address: $address\n";
		}
		#Look for phone number
		if ($tel) { #$content =~ /Tel: (.*)<\/font><\/td>/si)
			$theatre_info .= "Tel: $tel";
			#print "\nTel: $tel\n";
		}

		# Find Movies. Note use of minimal (*?) matching.
		#Mark the start of movie with _MOVIE_
		$content =~ s|<td colspan="3"><font size="2" face="verdana" color="#660099"><b><a href=".*?">(.*?)</a>&nbsp;</b>|_MOVIE_$1\012|gs;
		# Remove everything up to first movie
		$content =~ s/^.*?_MOVIE_/_MOVIE_/s;
		# Remove the HTML
		$content =~ s/<[^>]*>//gs;

        # Remove film reviews
        $content =~ s/film reviews//gsi;

		# Remove blanks
		$content =~ s/&nbsp;?//gs;
		# Nuke blank lines
		$content =~ s|^\s*\012||gm;


		my @movies = split /_MOVIE_/, $content;
		shift @movies; #Get rid of 1st duff entry
		my $n_mov = scalar(@movies);
		print "(Showing $n_mov movie", ($n_mov == 1) ? "" : "s", ")\n";

		my $TID = main::AddTheater ($theatrename);
		#TODO Add in all theatre information (not just address and phone)
		main::AddTheaterInfo ($TID, $theatre_info) if ($theatre_info);

		my $movie;
		MOVIE:
		foreach $movie (@movies) {
			my ($moviename, $cert, $showtimes);
			if ($movie =~ /\s*(.*)\s*\012\s*\((.*)\)\s*\012\s*(.*)\s*\012/) {
				($moviename, $cert, $showtimes) = ($1,$2,$3);
			} elsif ($movie =~ /\s*(.*)\s*\012\s*(.*)\s*\012/) {
				#Old films sometimes have no cert info (rather than unknown)
				($moviename, $cert, $showtimes) = ($1,"Unknown/No Cert",$2);
			} else {
				#$movie =~ s/\012/@\012/gsm; #Show newlines by preceeding with @
				print "Can't understand movie info, skipping:\n$movie\n";
				next MOVIE;
			}

            #print qq{\t$moviename\n\t$cert\n\t$showtimes\n\n};
            #sleep 4;

			#Change initial letters to upper case
			$moviename = ucwords ($moviename);
			
			my $MID = main::AddMovie ($moviename);
			my $movieinfo = "Cert: $cert";
			main::AddMovieInfo ( $MID, $movieinfo );

			my @days_times = split /\s*,\s*/, $showtimes; #Split Fri, Sat, 12:35PM, 7:30:PM
			while (@days_times) {
				my @dayIDs=(); #Clear array
				#Compile a list of day IDs
				while ( (@days_times) and $days_times[0]=~ /^[A-Z]/ ) { #While first element is a day
					my $this_day = shift @days_times; #Remove the 1st element
					my $short_day = lc (substr ($this_day, 0, 2)); #Two char day name
					push @dayIDs, $dayid_by_dayname{$short_day};
				}
				#Compile a string of times
				my $time_info = "";
				while ( (@days_times) and $days_times[0]=~ /^\d+:\d+\w+/ ) { #While first element is a a time e.g. 12:45PM
					$time_info .= ", " if ($time_info); #Add a coma unless it's the 1st item
					$time_info .= shift @days_times;
				}
				if ( (@dayIDs) and $time_info ) {#Need at least 1 day and time info
					#print "\t\tAdding showtimes: $time_info\n";
					foreach my $DID (@dayIDs) {
						main::AddShowtimes ($DID, $TID, $MID, $time_info);
					}
				} else {
					print "Couldn't find suitable day and time information ($moviename). Skipping.\n";
				}
			}
		} #End of loop over movies
	}#End of loop over theatres

}

sub ucwords ($)
{
  my $result=lc shift;
  $result =~ s/^(\w)/uc($1)/es;
  $result =~ s/([ .,;:+!\"-\(]+)(\w)/$1.uc($2)/ges;
  $result =~ s/(\')(\w)/$1.lc($2)/ges;
  return $result;
}

sub ucallwords ($)
{
  my $result=lc shift;
  $result =~ s/\b(\w)/uc($1)/es;
  $result =~ s/([ .,;:+!\"-\(]+)(\w)/$1.uc($2)/ges;
  $result =~ s/(\')(\w)/$1.lc($2)/ges;
  return $result;
}

sub date_plus
{
	my ($date,$plus) = @_;

	my ($days) = days_since_epoch(split("-",$date));

	$days += $plus;
	$days = ($days*60*60*24) + (7*60*60);

	return date_as_string(localtime($days));
}



# $mon = 1-12
# $day = 1-31 ???
# return value is the number of days since the epoch.
sub days_since_epoch
{
	my($year,$mon,$day) = @_;

	my $days_since = 0;

	if (not defined($day))
	{
		my ($mday,$mon,$year) = date_today();
		# this is done so that timelocal can work properly
		$mon--;

		$days_since = timelocal(00,00,00,$mday,$mon,$year); #time();
	}
	else
	{
		$days_since = timelocal(00,00,00,$day,$mon-1,$year);
	}

	$days_since = sprintf "%10.0f", ($days_since / (60*60*24));

	return $days_since;
}



# returns date today with no inputs, and $mon is rtned as 1-12;
sub date_today
{

	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);

	my $len = length($year);
	$year = substr $year, 2;
	if ($year > 20 and $year < 1900)
	{
		$year += 1900;
	}
	elsif ($year <= 20)
	{
		$year += 2000;
	}

	$mday = length($mday) == 2 ? $mday : "0$mday";
	++$mon;

	return ($mday,$mon,$year);

}

sub date_as_string #gmtime_as_string
{
	my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = @_;

	# Tue Nov 30 00:00:00 1999

	$mon = length($mon) == 2 ? $mon : "0$mon";
	$mon++;

	if ($year > 20 and $year < 1900)
	{
		$year += 1900;
	}
	elsif ($year <= 20)
	{
		$year += 2000;
	}

	my $date = "$year-$mon-$day";

	return ($date);

}

1;
