fetcher: make sure we don't download segments if keep=0
[hls-player:hls-player.git] / HLS / fetcher.py
1 # -*- Mode: Python -*-
2 # vi:si:et:sw=4:sts=4:ts=4
3 #
4 # Copyright (C) 2009-2010 Fluendo, S.L. (www.fluendo.com).
5 # Copyright (C) 2009-2010 Marc-Andre Lureau <marcandre.lureau@gmail.com>
6
7 # This file may be distributed and/or modified under the terms of
8 # the GNU General Public License version 2 as published by
9 # the Free Software Foundation.
10 # This file is distributed without any warranty; without even the implied
11 # warranty of merchantability or fitness for a particular purpose.
12 # See "LICENSE" in the source distribution for more information.
13
14 from itertools import ifilter
15 import logging
16 import os, os.path
17 import tempfile
18 import urlparse
19
20 from twisted.web import client
21 from twisted.internet import defer, reactor
22 from twisted.internet.task import deferLater
23
24 import HLS
25 from HLS.m3u8 import M3U8
26
27 class HLSFetcher(object):
28
29     def __init__(self, url, options=None, program=1):
30         self.url = url
31         self.program = program
32         if options:
33             self.path = options.path
34             self.referer = options.referer
35             self.bitrate = options.bitrate
36             self.n_segments_keep = options.keep
37         else:
38             self.path = None
39             self.referer = None
40             self.bitrate = 200000
41             self.n_segments_keep = 3
42         if not self.path:
43             self.path = tempfile.mkdtemp()
44
45         self._program_playlist = None
46         self._file_playlist = None
47         self._cookies = {}
48         self._cached_files = {}
49
50         self._files = None # the iter of the playlist files download
51         self._next_download = None # the delayed download defer, if any
52         self._file_playlisted = None # the defer to wait until new files are added to playlist
53
54     def _get_page(self, url):
55         def got_page(content):
56             logging.debug("Cookies: %r" % self._cookies)
57             return content
58         url = url.encode("utf-8")
59         if 'HLS_RESET_COOKIES' in os.environ.keys():
60             self._cookies = {}
61         headers = {}
62         if self.referer:
63             headers['Referer'] = self.referer
64         d = client.getPage(url, cookies=self._cookies, headers=headers)
65         d.addCallback(got_page)
66         return d
67
68     def _download_page(self, url, path):
69         # client.downloadPage does not support cookies!
70         def _check(x):
71             logging.debug("Received segment of %r bytes." % len(x))
72             return x
73
74         d = self._get_page(url)
75         d.addCallback(_check)
76         return d
77
78         return d
79
80     def _download_segment(self, f):
81         url = HLS.make_url(self._file_playlist.url, f['file'])
82         name = urlparse.urlparse(f['file']).path.split('/')[-1]
83         path = os.path.join(self.path, name)
84         d = self._download_page(url, path)
85         if self.n_segments_keep != 0:
86             file = open(path, 'w')
87             d.addCallback(lambda x: file.write(x))
88             d.addBoth(lambda _: file.close())
89             d.addCallback(lambda _: path)
90             d.addErrback(self._got_file_failed)
91             d.addCallback(self._got_file, url, f)
92         else:
93             d.addCallback(lambda _: (None, path, f))
94         return d
95
96     def delete_cache(self, f):
97         keys = self._cached_files.keys()
98         for i in ifilter(f, keys):
99             filename = self._cached_files[i]
100             logging.debug("Removing %r" % filename)
101             os.remove(filename)
102             del self._cached_files[i]
103         self._cached_files
104
105     def _got_file_failed(self, e):
106         if self._new_filed:
107             self._new_filed.errback(e)
108             self._new_filed = None
109
110     def _got_file(self, path, url, f):
111         logging.debug("Saved " + url + " in " + path)
112         self._cached_files[f['sequence']] = path
113         if self.n_segments_keep != -1:
114             self.delete_cache(lambda x: x <= f['sequence'] - self.n_segments_keep)
115         if self._new_filed:
116             self._new_filed.callback((path, url, f))
117             self._new_filed = None
118         return (path, url, f)
119
120     def _get_next_file(self, last_file=None):
121         next = self._files.next()
122         if next:
123             delay = 0
124             if last_file:
125                 if not self._cached_files.has_key(last_file['sequence'] - 1) or \
126                         not self._cached_files.has_key(last_file['sequence'] - 2):
127                     delay = 0
128                 elif self._file_playlist.endlist():
129                     delay = 1
130                 else:
131                     delay = 1 # last_file['duration'] doesn't work
132                               # when duration is not in sync with
133                               # player, which can happen easily...
134             return deferLater(reactor, delay, self._download_segment, next)
135         elif not self._file_playlist.endlist():
136             self._file_playlisted = defer.Deferred()
137             self._file_playlisted.addCallback(lambda x: self._get_next_file(last_file))
138             return self._file_playlisted
139
140     def _handle_end(self, failure):
141         failure.trap(StopIteration)
142         print "End of media"
143         reactor.stop()
144
145     def _get_files_loop(self, last_file=None):
146         if last_file:
147             (path, l, f) = last_file
148         else:
149             f = None
150         d = self._get_next_file(f)
151         # and loop
152         d.addCallback(self._get_files_loop)
153         d.addErrback(self._handle_end)
154
155     def _playlist_updated(self, pl):
156         if pl.has_programs():
157             # if we got a program playlist, save it and start a program
158             self._program_playlist = pl
159             (program_url, _) = pl.get_program_playlist(self.program, self.bitrate)
160             l = HLS.make_url(self.url, program_url)
161             return self._reload_playlist(M3U8(l))
162         elif pl.has_files():
163             # we got sequence playlist, start reloading it regularly, and get files
164             self._file_playlist = pl
165             if not self._files:
166                 self._files = pl.iter_files()
167             if not pl.endlist():
168                 # FIXME: reload delay - previous request time
169                 reactor.callLater(pl.reload_delay(), self._reload_playlist, pl)
170             if self._file_playlisted:
171                 self._file_playlisted.callback(pl)
172                 self._file_playlisted = None
173         else:
174             raise
175         return pl
176
177     def _got_playlist_content(self, content, pl):
178         if not pl.update(content):
179             # if the playlist cannot be loaded, start a reload timer
180             d = deferLater(reactor, pl.reload_delay(), self._fetch_playlist, pl)
181             d.addCallback(self._got_playlist_content, pl)
182             return d
183         return pl
184
185     def _fetch_playlist(self, pl):
186         logging.debug('fetching %r' % pl.url)
187         d = self._get_page(pl.url)
188         return d
189
190     def _reload_playlist(self, pl):
191         d = self._fetch_playlist(pl)
192         d.addCallback(self._got_playlist_content, pl)
193         d.addCallback(self._playlist_updated)
194         return d
195
196     def get_file(self, sequence):
197         d = defer.Deferred()
198         keys = self._cached_files.keys()
199         try:
200             sequence = ifilter(lambda x: x >= sequence, keys).next()
201             filename = self._cached_files[sequence]
202             d.callback(filename)
203         except:
204             d.addCallback(lambda x: self.get_file(sequence))
205             self._new_filed = d
206             keys.sort()
207             logging.debug('waiting for %r (available: %r)' % (sequence, keys))
208         return d
209
210     def _start_get_files(self, x):
211         self._new_filed = defer.Deferred()
212         self._get_files_loop()
213         return self._new_filed
214
215     def start(self):
216         self._files = None
217         d = self._reload_playlist(M3U8(self.url))
218         d.addCallback(self._start_get_files)
219         return d
220
221     def stop(self):
222         pass
223