1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# vim: set fileencoding=utf-8 tabstop=4 shiftwidth=4 expandtab smartindent:
4
u"""
5
Créé un flux RSS avec les interventions de Jean-Marc Jancovici sur France
6
Info.
7
8
Les fichiers sonores sont placés dans des balises ``<enclosure/>`` pour faciliter le podcasting.
9
10
:Authors:
11
    Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org>
12
13
:License:
14
    GNU GPL v3 or later
15
"""
16
17
import os
18
import sys
19
import urllib2
20
import urlparse
21
import re
22
import BeautifulSoup
23
from pprint import pprint
24
25
list_url = "http://www.france-info.com/rss/Le_regard_de_Jean-Marc_Jancovici.xml"
26
base_url = "http://www.france-info.com"
27
28
js_re = re.compile("jstoflash\('play','.*','','','([^']*\.mp3)'\);")
29
30
list_page = urllib2.urlopen(list_url).read()
31
list_content = BeautifulSoup.BeautifulStoneSoup(list_page)
32
for item in list_content.find("atom:link").findAll(recursive=False):
33
    list_content.channel.insert(-1, item)
34
list_content.find("atom:link").extract()
35
for item in list_content.channel.findAll("item"):
36
    #print item.prettify()
37
    link = item.link.string
38
    #print link
39
    item_page = urllib2.urlopen(link).read()
40
    item_content = BeautifulSoup.BeautifulSoup(item_page)
41
    play_links = item_content.find("a", {"class": "p_ficheEcouter"})
42
    js_link = play_links["onclick"]
43
    js_mo = js_re.search(js_link)
44
    podcast_rel = js_mo.group(1)
45
    podcast_url = urlparse.urljoin(base_url, podcast_rel)
46
    #print "URL:", podcast_url
47
    item.enclosure["url"] = podcast_url
48
    try:
49
        podcast_info = urllib2.urlopen(podcast_url).info()
50
    except urllib2.URLError, e:
51
        sys.stderr.write("Error getting podcast info from %s: %s\n" %
52
                         (podcast_url, e))
53
        item.enclosure["type"] = "audio/mpeg" # reasonable fallback
54
        del item.enclosure["length"]
55
        continue
56
    item.enclosure["type"] = podcast_info.getheader("Content-Type")
57
    item.enclosure["length"] = podcast_info.getheader("Content-Length")
58
59
print list_content.prettify()