· 6 years ago · May 23, 2019, 06:52 PM
1#!/usr/bin/perl -w
2
3=pod
4
5=head1 NAME
6
7tv_grab_il - Grab TV listings for Israel.
8
9=head1 SYNOPSIS
10
11tv_grab_il --help
12
13tv_grab_il --version
14
15tv_grab_il --capabilities
16
17tv_grab_il --description
18
19
20tv_grab_il [--config-file FILE]
21 [--days N] [--offset N] [--slow]
22 [--output FILE] [--quiet] [--debug]
23
24tv_grab_il --configure [--config-file FILE]
25
26tv_grab_il --configure-api [--stage NAME]
27 [--config-file FILE]
28 [--output FILE]
29
30tv_grab_il --list-channels [--config-file FILE]
31 [--output FILE] [--quiet] [--debug]
32
33=head1 DESCRIPTION
34
35Output TV listings in XMLTV format for many channels available in Israel.
36The data comes from tv-guide.walla.co.il.
37
38First you must run B<tv_grab_il --configure> to choose which channels
39you want to receive.
40
41Then running B<tv_grab_il> with no arguments will get a listings in XML
42format for the channels you chose for available days including today.
43
44=head1 OPTIONS
45
46B<--configure> Prompt for which channels to download and write the
47configuration file.
48
49B<--config-file FILE> Set the name of the configuration file, the
50default is B<~/.xmltv/tv_grab_il.conf>. This is the file written by
51B<--configure> and read when grabbing.
52
53B<--output FILE> When grabbing, write output to FILE rather than
54standard output.
55
56B<--days N> When grabbing, grab N days rather than all available days.
57
58B<--offset N> Start grabbing at today + N days. N may be negative.
59
60B<--slow> Get programme descriptions as well as title. (Will take a
61long time with lots of channels selected.)
62
63B<--quiet> Suppress the progress-bar normally shown on standard error.
64
65B<--debug> Provide more information on progress to stderr to help in
66debugging.
67
68B<--list-channels> Write output giving <channel> elements for every
69channel available (ignoring the config file), but no programmes.
70
71B<--capabilities> Show which capabilities the grabber supports. For more
72information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>
73
74B<--version> Show the version of the grabber.
75
76B<--help> Print a help message and exit.
77
78=head1 ERROR HANDLING
79
80If the grabber fails to download data for some channel on a specific day,
81it will print an errormessage to STDERR and then continue with the other
82channels and days. The grabber will exit with a status code of 1 to indicate
83that the data is incomplete.
84
85=head1 ENVIRONMENT VARIABLES
86
87The environment variable HOME can be set to change where configuration
88files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
89it might be necessary to set HOME to a path without spaces in it.
90
91=head1 SUPPORTED CHANNELS
92
93For information on supported channels, see http://tv-guide.walla.co.il/
94
95=head1 AUTHOR
96
97lightpriest. This documentation and parts of the code
98based on various other tv_grabbers from the XMLTV-project.
99
100=head1 SEE ALSO
101
102L<xmltv(5)>.
103
104=cut
105
106use strict;
107use Encode;
108use XMLTV::Options qw/ParseOptions/;
109use XMLTV::ProgressBar;
110use XMLTV::Configure::Writer;
111use XMLTV::Get_nice qw(get_nice_tree);
112
113use POSIX qw(strftime);
114use DateTime;
115
116
117# only used while testing the 'slow' option
118##$XMLTV::Get_nice::Delay = 0;
119##use HTTP::Cache::Transparent;
120##HTTP::Cache::Transparent::init( {
121## BasePath => '/root/.xmltv/cache',
122## NoUpdate => 60*60, # cache time in seconds
123## MaxAge => 24, # flush time in hours
124## Verbose => 1,
125##} );
126
127
128my $channel_link_regexp = "\\?w=\\/\\/\\/[0-9]*\\/\\/[A-Za-z]*\\/1";
129my $channel_link_id = "\\?w=\\/\\/\\/([0-9]*)\\/\\/[A-Za-z]*\\/1";
130
131# Use XMLTV::Options::ParseOptions to parse the options and take care of the basic capabilities that a tv_grabber should
132my ($opt, $conf) = ParseOptions({
133 grabber_name => "tv_grab_il",
134 version => '$Id: tv_grab_il,v 1.28 2015/06/28 08:40:31 knowledgejunkie Exp $',
135 capabilities => [qw/baseline manualconfig apiconfig/],
136 stage_sub => \&config_stage,
137 listchannels_sub => \&write_channels,
138 description => "Israel (tv-guide.walla.co.il)",
139 extra_options => [qw/slow/], # grab descriptions from sub-page
140});
141
142sub config_stage {
143 my ($stage, $conf) = @_;
144
145 die "Unknown stage $stage" unless $stage eq "start";
146
147 my $result;
148 my $writer = new XMLTV::Configure::Writer(OUTPUT => \$result, encoding => 'utf-8');
149 $writer->start({'generator-info-name' => 'tv_grab_il'});
150 $writer->end('select-channels');
151 return $result;
152}
153
154sub fetch_channels {
155 my ($opt, $conf) = @_;
156
157 my $channels = {};
158
159 my $bar = new XMLTV::ProgressBar({
160 name => "Fetching channels",
161 count => 1
162 }) unless ($opt->{quiet} || $opt->{debug});
163
164 # Get the page containing the list of channels
165 my $tree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il', undef, 'windows-1255');
166 my @channels = $tree->look_down("_tag", "a",
167 "href", qr/$channel_link_regexp/,
168 sub { !$_[0]->look_down('_tag', 'img') }
169 );
170
171 $bar->update() && $bar->finish && undef $bar if defined $bar;
172
173 $bar = new XMLTV::ProgressBar({
174 name => "Parsing result",
175 count => scalar @channels
176 }) unless ($opt->{quiet} || $opt->{debug});
177
178 # Browse through the downloaded list of channels and map them to a hash XMLTV::Writer would understand
179 foreach my $channel (@channels) {
180 if ($channel->as_text()) {
181 my ($id) = $channel->attr('href') =~ /$channel_link_id/;
182
183 # Try to fetch the icon
184 my $icon = $channel->parent();
185 $icon = $icon->right if $icon;
186 $icon = $icon->look_down('_tag', 'a', 'href', qr/$channel_link_regexp/) if $icon;
187 $icon = $icon->look_down('_tag', 'img') if $icon;
188 $icon = $icon->attr('src') if $icon;
189
190 $channels->{"$id.tv-guide.walla.co.il"} = {
191 id => "$id.tv-guide.walla.co.il",
192 'display-name' => [[ encode( 'utf-8', $channel->as_text()) ]],
193 url => [ $channel->attr('href') ]
194 };
195 $channels->{"$id.tv-guide.walla.co.il"}->{icon} = [ {src => ($icon || '')} ] if ($icon)
196
197 }
198
199 $bar->update() if defined $bar;
200 }
201
202 $bar->finish() && undef $bar if defined $bar;
203
204 # Notifying the user :)
205 $bar = new XMLTV::ProgressBar({
206 name => "Reformatting",
207 count => 1
208 }) unless ($opt->{quiet} || $opt->{debug});
209
210 $bar->update() && $bar->finish() if defined $bar;
211
212 return $channels;
213}
214
215sub write_channels {
216 my $channels = fetch_channels($opt, $conf);
217
218 # Let XMLTV::Writer format the results as a valid xmltv file
219 my $result;
220 my $writer = new XMLTV::Writer(OUTPUT => \$result, encoding => 'utf-8');
221 $writer->start({'generator-info-name' => 'tv_grab_il'});
222 $writer->write_channels($channels);
223 $writer->end();
224
225 return $result;
226}
227
228# Fetch the channels again to see what's available
229my $channels = fetch_channels($opt, $conf);
230
231# Configure initial elements for XMLTV::Writer
232#
233# Create a new hash for the channels so that channels without programmes
234# won't appear in the final XML
235my $encoding = 'UTF-8';
236my $credits = {'generator-info-name' => 'tv_grab_il'};
237my $w_channels = {};
238my $programmes = [];
239
240# Progress Bar :)
241my $bar = new XMLTV::ProgressBar({
242 name => "Fetching channels listings",
243 count => (scalar @{$conf->{channel}}) * $opt->{days}
244}) unless ($opt->{quiet} || $opt->{debug});
245
246# Fetch listings per channel
247foreach my $channel_id (@{$conf->{channel}}) {
248
249 # Check each channel still exists in walla's channels page
250 if ($channels->{$channel_id}) {
251 my ($walla_id) = ($channel_id =~ /^([0-9]*)\..*$/);
252
253 # Now grab listings for each channel on each day, according to the options in $opt
254 for (my $i=$opt->{offset}; $i < ($opt->{offset} + $opt->{days}); $i++) {
255 my $theday = DateTime->today()->add (days => $i)->set_time_zone('Asia/Jerusalem');
256 my $url = "http://tv-guide.walla.co.il/?w=/4//$walla_id//" . $theday->day_name() . "/1";
257
258 my $tree = XMLTV::Get_nice::get_nice_tree($url, undef, 'windows-1255');
259
260 if ($tree) {
261 my @shows = $tree->look_down('_tag', 'table', 'width', '100%', 'dir', 'ltr', 'cellpadding', '2', 'border', '0');
262 if (@shows) {
263 foreach my $show (@shows) {
264 my $title = $show->look_down('_tag', 'a', 'class', 'w3b');
265 my $show_hour_element = $show->look_down('_tag', 'span', 'class', 'w3b txt-w');
266
267 my @show_hour = split(/:/, $show_hour_element->as_text());
268 my $show_time = $theday->clone();
269 if ($show_hour[0] < 6) {
270 $show_time->add (days => 1);
271 }
272 $show_time->set(hour => $show_hour[0], minute => $show_hour[1], second => 0);
273
274
275 # If user wants descriptions then get them from the sub-page
276 # (can also get stop time while we're there!)
277 my ($start, $stop, $desc) = ('', '', ''); my $stop_time;
278 if ( defined $opt->{slow} && $opt->{slow} ) {
279
280 if ( my $detailtree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il/'.$title->attr('href'), undef, 'windows-1255') ) {
281 if ( my $table = $detailtree->look_down('_tag', 'table', 'class', 'wp-0-b') ) { # get 1st "wp-0-b" table
282 if ( my $td = $table->look_down('_tag', 'td', 'class', 'w2b', sub{ !$_[0]->look_down('_tag', 'h2') } ) ) {
283 ($start, $stop) = $td->as_text() =~ /(\d\d:\d\d).*(\d\d:\d\d)/;
284 }
285 if ( my $td = $table->look_down('_tag', 'td', 'class', 'w3') ) {
286 $desc = $td->as_text();
287 }
288 }
289 }
290 if ( $stop ne '' ) {
291 my @stop_hour = split(/:/, $stop);
292 $stop_time = $show_time->clone();
293 $stop_time->add (days => 1) if ( $stop_hour[0] < $show_hour[0] ); # assumes prog not last > 24hours!
294 $stop_time->set(hour => $stop_hour[0], minute => $stop_hour[1], second => 0);
295 }
296
297 }
298
299 my $prog= {
300 start => $show_time->strftime("%Y%m%d%H%M%S %z"),
301 title => [[ encode( 'utf-8', $title->as_text()) ]],
302 channel => $channel_id
303 };
304 $prog->{'stop'} = $stop_time->strftime("%Y%m%d%H%M%S %z") if defined $stop_time;
305 $prog->{'desc'} = [[ encode( 'utf-8', $desc) ]] if $desc ne '';
306 push @{$programmes}, $prog;
307
308 }
309
310 # Add this channel to the finalized XML
311 $w_channels->{$channel_id} = $channels->{$channel_id} unless $w_channels->{$channel_id};
312
313 } else {
314 }
315 } else {
316 }
317
318 $bar->update if defined $bar;
319 }
320 }
321}
322
323$bar->finish() && undef $bar if defined $bar;
324
325my %w_args;
326
327if (($opt->{offset} != 0) || ($opt->{days} != -999)) {
328 $w_args{offset} = $opt->{offset};
329 $w_args{days} = ($opt->{days} == -999) ? 100 : $opt->{days};
330 $w_args{cutoff} = '060000';
331}
332
333my $data = [];
334$data->[0] = $encoding;
335$data->[1] = $credits;
336$data->[2] = $w_channels;
337$data->[3] = $programmes;
338
339XMLTV::write_data($data, %w_args);