update to new release

epel9
Till Maas 14 years ago
parent 817631c343
commit af7dea07c6

@ -4,9 +4,14 @@
# Author: Danny Colligan # Author: Danny Colligan
# Author: Benjamin Johnson # Author: Benjamin Johnson
# Author: Vasyl' Vavrychuk # Author: Vasyl' Vavrychuk
# Author: Witold Baryluk
# Author: Paweł Paprota
# License: Public domain code # License: Public domain code
import cookielib import cookielib
import ctypes
import datetime import datetime
import email.utils
import gzip
import htmlentitydefs import htmlentitydefs
import httplib import httplib
import locale import locale
@ -17,11 +22,13 @@ import os.path
import re import re
import socket import socket
import string import string
import StringIO
import subprocess import subprocess
import sys import sys
import time import time
import urllib import urllib
import urllib2 import urllib2
import zlib
# parse_qs was moved from the cgi module to the urlparse module recently. # parse_qs was moved from the cgi module to the urlparse module recently.
try: try:
@ -30,9 +37,10 @@ except ImportError:
from cgi import parse_qs from cgi import parse_qs
std_headers = { std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5', 'Accept-Language': 'en-us,en;q=0.5',
} }
@ -111,6 +119,14 @@ def sanitize_open(filename, open_mode):
stream = open(filename, open_mode) stream = open(filename, open_mode)
return (stream, filename) return (stream, filename)
def timeconvert(timestr):
"""Convert RFC 2822 defined time string into system timestamp"""
timestamp = None
timetuple = email.utils.parsedate_tz(timestr)
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
class DownloadError(Exception): class DownloadError(Exception):
"""Download Error exception. """Download Error exception.
@ -159,6 +175,64 @@ class ContentTooShortError(Exception):
self.downloaded = downloaded self.downloaded = downloaded
self.expected = expected self.expected = expected
class YoutubeDLHandler(urllib2.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
the standard headers to every HTTP request and handles gzipped and
deflated responses from web servers. If compression is to be avoided in
a particular request, the original request in the program code only has
to include the HTTP header "Youtubedl-No-Compression", which will be
removed before making the real request.
Part of this code was copied from:
http://techknack.net/python-urllib2-handlers/
Andrew Rowls, the author of that code, agreed to release it to the
public domain.
"""
@staticmethod
def deflate(data):
try:
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
@staticmethod
def addinfourl_wrapper(stream, headers, url, code):
if hasattr(urllib2.addinfourl, 'getcode'):
return urllib2.addinfourl(stream, headers, url, code)
ret = urllib2.addinfourl(stream, headers, url)
ret.code = code
return ret
def http_request(self, req):
for h in std_headers:
if h in req.headers:
del req.headers[h]
req.add_header(h, std_headers[h])
if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
return req
def http_response(self, req, resp):
old_resp = resp
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = StringIO.StringIO(self.deflate(resp.read()))
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
return resp
class FileDownloader(object): class FileDownloader(object):
"""File Downloader class. """File Downloader class.
@ -194,6 +268,7 @@ class FileDownloader(object):
forcetitle: Force printing title. forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL. forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description. forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files. simulate: Do not download the video files.
format: Video format code. format: Video format code.
format_limit: Highest quality format to try. format_limit: Highest quality format to try.
@ -207,6 +282,9 @@ class FileDownloader(object):
playliststart: Playlist item to start at. playliststart: Playlist item to start at.
playlistend: Playlist item to end at. playlistend: Playlist item to end at.
logtostderr: Log messages to stderr instead of stdout. logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
""" """
params = None params = None
@ -235,13 +313,6 @@ class FileDownloader(object):
if not os.path.exists(dir): if not os.path.exists(dir):
os.mkdir(dir) os.mkdir(dir)
@staticmethod
def temp_name(filename):
"""Returns a temporary filename for the given filename."""
if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)):
return filename
return filename + u'.part'
@staticmethod @staticmethod
def format_bytes(bytes): def format_bytes(bytes):
if bytes is None: if bytes is None:
@ -331,6 +402,17 @@ class FileDownloader(object):
"""Print message to stderr.""" """Print message to stderr."""
print >>sys.stderr, message.encode(preferredencoding()) print >>sys.stderr, message.encode(preferredencoding())
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
def fixed_template(self): def fixed_template(self):
"""Checks if the output template is fixed.""" """Checks if the output template is fixed."""
return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
@ -361,6 +443,18 @@ class FileDownloader(object):
if speed > rate_limit: if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(filename) and not os.path.isfile(filename)):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename): def try_rename(self, old_filename, new_filename):
try: try:
if old_filename == new_filename: if old_filename == new_filename:
@ -369,6 +463,23 @@ class FileDownloader(object):
except (IOError, OSError), err: except (IOError, OSError), err:
self.trouble(u'ERROR: unable to rename file') self.trouble(u'ERROR: unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(filename):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return
try:
os.utime(filename,(time.time(), filetime))
except:
pass
def report_destination(self, filename): def report_destination(self, filename):
"""Report destination filename.""" """Report destination filename."""
self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
@ -379,6 +490,8 @@ class FileDownloader(object):
return return
self.to_screen(u'\r[download] %s of %s at %s ETA %s' % self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True) (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len): def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte.""" """Report attempt to resume at given byte."""
@ -410,8 +523,21 @@ class FileDownloader(object):
"""Increment the ordinal that assigns a number to each file.""" """Increment the ordinal that assigns a number to each file."""
self._num_downloads += 1 self._num_downloads += 1
def prepare_filename(self, info_dict):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
filename = self.params['outtmpl'] % template_dict
return filename
except (ValueError, KeyError), err:
self.trouble(u'ERROR: invalid system charset or erroneous output template')
return None
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single dictionary returned by an InfoExtractor."""
filename = self.prepare_filename(info_dict)
# Do nothing else if in simulate mode # Do nothing else if in simulate mode
if self.params.get('simulate', False): if self.params.get('simulate', False):
# Forced printings # Forced printings
@ -423,16 +549,12 @@ class FileDownloader(object):
print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
if self.params.get('forcedescription', False) and 'description' in info_dict: if self.params.get('forcedescription', False) and 'description' in info_dict:
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
if self.params.get('forcefilename', False) and filename is not None:
print filename.encode(preferredencoding(), 'xmlcharrefreplace')
return return
try: if filename is None:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
filename = self.params['outtmpl'] % template_dict
except (ValueError, KeyError), err:
self.trouble(u'ERROR: invalid system charset or erroneous output template')
return return
if self.params.get('nooverwrites', False) and os.path.exists(filename): if self.params.get('nooverwrites', False) and os.path.exists(filename):
self.to_stderr(u'WARNING: file exists and will be skipped') self.to_stderr(u'WARNING: file exists and will be skipped')
@ -531,7 +653,7 @@ class FileDownloader(object):
def _do_download(self, filename, url, player_url): def _do_download(self, filename, url, player_url):
# Check file already present # Check file already present
if self.params.get('continuedl', False) and os.path.isfile(filename): if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename) self.report_file_already_downloaded(filename)
return True return True
@ -542,8 +664,11 @@ class FileDownloader(object):
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None
open_mode = 'wb' open_mode = 'wb'
basic_request = urllib2.Request(url, None, std_headers)
request = urllib2.Request(url, None, std_headers) # Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
basic_request = urllib2.Request(url, None, headers)
request = urllib2.Request(url, None, headers)
# Establish possible resume length # Establish possible resume length
if os.path.isfile(tmpfilename): if os.path.isfile(tmpfilename):
@ -606,8 +731,10 @@ class FileDownloader(object):
return False return False
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = long(data_len) + resume_len
data_len_str = self.format_bytes(data_len) data_len_str = self.format_bytes(data_len)
byte_counter = 0 byte_counter = 0 + resume_len
block_size = 1024 block_size = 1024
start = time.time() start = time.time()
while True: while True:
@ -615,15 +742,15 @@ class FileDownloader(object):
before = time.time() before = time.time()
data_block = data.read(block_size) data_block = data.read(block_size)
after = time.time() after = time.time()
data_block_len = len(data_block) if len(data_block) == 0:
if data_block_len == 0:
break break
byte_counter += data_block_len byte_counter += len(data_block)
# Open file just in time # Open file just in time
if stream is None: if stream is None:
try: try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename) self.report_destination(filename)
except (OSError, IOError), err: except (OSError, IOError), err:
self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
@ -633,22 +760,27 @@ class FileDownloader(object):
except (IOError, OSError), err: except (IOError, OSError), err:
self.trouble(u'\nERROR: unable to write data: %s' % str(err)) self.trouble(u'\nERROR: unable to write data: %s' % str(err))
return False return False
block_size = self.best_block_size(after - before, data_block_len) block_size = self.best_block_size(after - before, len(data_block))
# Progress message # Progress message
percent_str = self.calc_percent(byte_counter, data_len) percent_str = self.calc_percent(byte_counter, data_len)
eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
speed_str = self.calc_speed(start, time.time(), byte_counter) speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str) self.report_progress(percent_str, data_len_str, speed_str, eta_str)
# Apply rate limit # Apply rate limit
self.slow_down(start, byte_counter) self.slow_down(start, byte_counter - resume_len)
stream.close() stream.close()
self.report_finish() self.report_finish()
if data_len is not None and str(byte_counter) != data_len: if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, long(data_len)) raise ContentTooShortError(byte_counter, long(data_len))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
self.try_utime(filename, data.info().get('last-modified', None))
return True return True
class InfoExtractor(object): class InfoExtractor(object):
@ -725,7 +857,7 @@ class InfoExtractor(object):
class YoutubeIE(InfoExtractor): class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com.""" """Information extractor for youtube.com."""
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@ -804,7 +936,7 @@ class YoutubeIE(InfoExtractor):
return return
# Set language # Set language
request = urllib2.Request(self._LANG_URL, None, std_headers) request = urllib2.Request(self._LANG_URL)
try: try:
self.report_lang() self.report_lang()
urllib2.urlopen(request).read() urllib2.urlopen(request).read()
@ -824,7 +956,7 @@ class YoutubeIE(InfoExtractor):
'username': username, 'username': username,
'password': password, 'password': password,
} }
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
try: try:
self.report_login() self.report_login()
login_results = urllib2.urlopen(request).read() login_results = urllib2.urlopen(request).read()
@ -840,7 +972,7 @@ class YoutubeIE(InfoExtractor):
'next_url': '/', 'next_url': '/',
'action_confirm': 'Confirm', 'action_confirm': 'Confirm',
} }
request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
try: try:
self.report_age_confirmation() self.report_age_confirmation()
age_results = urllib2.urlopen(request).read() age_results = urllib2.urlopen(request).read()
@ -858,7 +990,7 @@ class YoutubeIE(InfoExtractor):
# Get video webpage # Get video webpage
self.report_video_webpage_download(video_id) self.report_video_webpage_download(video_id)
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
try: try:
video_webpage = urllib2.urlopen(request).read() video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -877,7 +1009,7 @@ class YoutubeIE(InfoExtractor):
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type)) % (video_id, el_type))
request = urllib2.Request(video_info_url, None, std_headers) request = urllib2.Request(video_info_url)
try: try:
video_info_webpage = urllib2.urlopen(request).read() video_info_webpage = urllib2.urlopen(request).read()
video_info = parse_qs(video_info_webpage) video_info = parse_qs(video_info_webpage)
@ -945,7 +1077,6 @@ class YoutubeIE(InfoExtractor):
# Decide which formats to download # Decide which formats to download
req_format = self._downloader.params.get('format', None) req_format = self._downloader.params.get('format', None)
get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
if 'fmt_url_map' in video_info: if 'fmt_url_map' in video_info:
url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
@ -963,10 +1094,11 @@ class YoutubeIE(InfoExtractor):
elif req_format == '-1': elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else: else:
if req_format in url_map: # Specific format
video_url_list = [(req_format, url_map[req_format])] # Specific format if req_format not in url_map:
else: self._downloader.trouble(u'ERROR: requested format not available')
video_url_list = [(req_format, get_video_template % req_format)] # Specific format return
video_url_list = [(req_format, url_map[req_format])] # Specific format
elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download() self.report_rtmp_download()
@ -1000,7 +1132,7 @@ class YoutubeIE(InfoExtractor):
'player_url': player_url, 'player_url': player_url,
}) })
except UnavailableVideoError, err: except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') self._downloader.trouble(u'\nERROR: unable to download video')
class MetacafeIE(InfoExtractor): class MetacafeIE(InfoExtractor):
@ -1037,7 +1169,7 @@ class MetacafeIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
# Retrieve disclaimer # Retrieve disclaimer
request = urllib2.Request(self._DISCLAIMER, None, std_headers) request = urllib2.Request(self._DISCLAIMER)
try: try:
self.report_disclaimer() self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
@ -1050,7 +1182,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0', 'filters': '0',
'submit': "Continue - I'm over 18", 'submit': "Continue - I'm over 18",
} }
request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
try: try:
self.report_age_confirmation() self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
@ -1145,7 +1277,7 @@ class MetacafeIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError: except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class DailymotionIE(InfoExtractor): class DailymotionIE(InfoExtractor):
@ -1214,7 +1346,7 @@ class DailymotionIE(InfoExtractor):
video_title = mobj.group(1).decode('utf-8') video_title = mobj.group(1).decode('utf-8')
video_title = sanitize_title(video_title) video_title = sanitize_title(video_title)
mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage) mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
if mobj is None: if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname') self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return return
@ -1234,7 +1366,7 @@ class DailymotionIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError: except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class GoogleIE(InfoExtractor): class GoogleIE(InfoExtractor):
"""Information extractor for video.google.com.""" """Information extractor for video.google.com."""
@ -1344,7 +1476,7 @@ class GoogleIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError: except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class PhotobucketIE(InfoExtractor): class PhotobucketIE(InfoExtractor):
@ -1426,7 +1558,7 @@ class PhotobucketIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError: except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
@ -1584,7 +1716,7 @@ class YahooIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError: except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1685,7 +1817,7 @@ class GenericIE(InfoExtractor):
'player_url': None, 'player_url': None,
}) })
except UnavailableVideoError, err: except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download video') self._downloader.trouble(u'\nERROR: unable to download video')
class YoutubeSearchIE(InfoExtractor): class YoutubeSearchIE(InfoExtractor):
@ -1753,7 +1885,7 @@ class YoutubeSearchIE(InfoExtractor):
while True: while True:
self.report_download_page(query, pagenum) self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers) request = urllib2.Request(result_url)
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1844,7 +1976,7 @@ class GoogleSearchIE(InfoExtractor):
while True: while True:
self.report_download_page(query, pagenum) self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers) request = urllib2.Request(result_url)
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1935,7 +2067,7 @@ class YahooSearchIE(InfoExtractor):
while True: while True:
self.report_download_page(query, pagenum) self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers) request = urllib2.Request(result_url)
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1964,7 +2096,7 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists.""" """Information Extractor for YouTube playlists."""
_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@ -1999,7 +2131,7 @@ class YoutubePlaylistIE(InfoExtractor):
while True: while True:
self.report_download_page(playlist_id, pagenum) self.report_download_page(playlist_id, pagenum)
request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -2028,9 +2160,11 @@ class YoutubePlaylistIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users.""" """Information Extractor for YouTube users."""
_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. _GDATA_PAGE_SIZE = 50
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_youtube_ie = None _youtube_ie = None
def __init__(self, youtube_ie, downloader=None): def __init__(self, youtube_ie, downloader=None):
@ -2041,9 +2175,10 @@ class YoutubeUserIE(InfoExtractor):
def suitable(url): def suitable(url):
return (re.match(YoutubeUserIE._VALID_URL, url) is not None) return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
def report_download_page(self, username): def report_download_page(self, username, start_index):
"""Report attempt to download user page.""" """Report attempt to download user page."""
self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
(username, start_index, start_index + self._GDATA_PAGE_SIZE))
def _real_initialize(self): def _real_initialize(self):
self._youtube_ie.initialize() self._youtube_ie.initialize()
@ -2055,34 +2190,63 @@ class YoutubeUserIE(InfoExtractor):
self._downloader.trouble(u'ERROR: invalid url: %s' % url) self._downloader.trouble(u'ERROR: invalid url: %s' % url)
return return
# Download user page
username = mobj.group(1) username = mobj.group(1)
# Download video ids using YouTube Data API. Result size per
# query is limited (currently to 50 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = [] video_ids = []
pagenum = 1 pagenum = 0
self.report_download_page(username) while True:
request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) start_index = pagenum * self._GDATA_PAGE_SIZE + 1
try: self.report_download_page(username, start_index)
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
return try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
return
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
# Extract video identifiers video_ids.extend(ids_in_page)
ids_in_page = []
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break
for mobj in re.finditer(self._VIDEO_INDICATOR, page): pagenum += 1
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
video_ids.extend(ids_in_page)
all_ids_count = len(video_ids)
playliststart = self._downloader.params.get('playliststart', 1) - 1 playliststart = self._downloader.params.get('playliststart', 1) - 1
playlistend = self._downloader.params.get('playlistend', -1) playlistend = self._downloader.params.get('playlistend', -1)
video_ids = video_ids[playliststart:playlistend]
for id in video_ids: if playlistend == -1:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) video_ids = video_ids[playliststart:]
return else:
video_ids = video_ids[playliststart:playlistend]
self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
(username, all_ids_count, len(video_ids)))
for video_id in video_ids:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
class DepositFilesIE(InfoExtractor): class DepositFilesIE(InfoExtractor):
"""Information extractor for depositfiles.com""" """Information extractor for depositfiles.com"""
@ -2117,7 +2281,7 @@ class DepositFilesIE(InfoExtractor):
# Retrieve file webpage with 'Free download' button pressed # Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' } free_download_indication = { 'gateway_result' : '1' }
request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) request = urllib2.Request(url, urllib.urlencode(free_download_indication))
try: try:
self.report_download_webpage(file_id) self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
@ -2216,26 +2380,32 @@ if __name__ == '__main__':
import getpass import getpass
import optparse import optparse
# Function to update the program file with the latest version from bitbucket.org # Function to update the program file with the latest version from the repository.
def update_self(downloader, filename): def update_self(downloader, filename):
# Note: downloader only used for options # Note: downloader only used for options
if not os.access (filename, os.W_OK): if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename) sys.exit('ERROR: no write permissions on %s' % filename)
downloader.to_screen('Updating to latest stable version...') downloader.to_screen('Updating to latest stable version...')
latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' try:
latest_version = urllib.urlopen(latest_url).read().strip() latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version latest_version = urllib.urlopen(latest_url).read().strip()
newcontent = urllib.urlopen(prog_url).read() prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
stream = open(filename, 'w') newcontent = urllib.urlopen(prog_url).read()
stream.write(newcontent) except (IOError, OSError), err:
stream.close() sys.exit('ERROR: unable to download latest version')
try:
stream = open(filename, 'w')
stream.write(newcontent)
stream.close()
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
downloader.to_screen('Updated to version %s' % latest_version) downloader.to_screen('Updated to version %s' % latest_version)
# Parse command line # Parse command line
parser = optparse.OptionParser( parser = optparse.OptionParser(
usage='Usage: %prog [options] url...', usage='Usage: %prog [options] url...',
version='2010.12.09', version='2011.01.30',
conflict_handler='resolve', conflict_handler='resolve',
) )
@ -2255,6 +2425,9 @@ if __name__ == '__main__':
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
parser.add_option('--playlist-end', parser.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
parser.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication = optparse.OptionGroup(parser, 'Authentication Options')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@ -2268,14 +2441,10 @@ if __name__ == '__main__':
video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option('-f', '--format', video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', help='video format code') action='store', dest='format', metavar='FORMAT', help='video format code')
video_format.add_option('-m', '--mobile-version',
action='store_const', dest='format', help='alias for -f 17', const='17')
video_format.add_option('--all-formats', video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='-1') action='store_const', dest='format', help='download all available video formats', const='-1')
video_format.add_option('--max-quality', video_format.add_option('--max-quality',
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-b', '--best-quality',
action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
parser.add_option_group(video_format) parser.add_option_group(video_format)
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@ -2288,11 +2457,19 @@ if __name__ == '__main__':
verbosity.add_option('-e', '--get-title', verbosity.add_option('-e', '--get-title',
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
verbosity.add_option('--get-thumbnail', verbosity.add_option('--get-thumbnail',
action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) action='store_true', dest='getthumbnail',
help='simulate, quiet but print thumbnail URL', default=False)
verbosity.add_option('--get-description', verbosity.add_option('--get-description',
action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) action='store_true', dest='getdescription',
help='simulate, quiet but print video description', default=False)
verbosity.add_option('--get-filename',
action='store_true', dest='getfilename',
help='simulate, quiet but print output filename', default=False)
verbosity.add_option('--no-progress', verbosity.add_option('--no-progress',
action='store_true', dest='noprogress', help='do not print progress bar', default=False) action='store_true', dest='noprogress', help='do not print progress bar', default=False)
verbosity.add_option('--console-title',
action='store_true', dest='consoletitle',
help='display progress in console titlebar', default=False)
parser.add_option_group(verbosity) parser.add_option_group(verbosity)
filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@ -2301,7 +2478,8 @@ if __name__ == '__main__':
filesystem.add_option('-l', '--literal', filesystem.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False) action='store_true', dest='useliteral', help='use literal title in file name', default=False)
filesystem.add_option('-A', '--auto-number', filesystem.add_option('-A', '--auto-number',
action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output', filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template') dest='outtmpl', metavar='TEMPLATE', help='output filename template')
filesystem.add_option('-a', '--batch-file', filesystem.add_option('-a', '--batch-file',
@ -2312,6 +2490,11 @@ if __name__ == '__main__':
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
filesystem.add_option('--cookies', filesystem.add_option('--cookies',
dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
filesystem.add_option('--no-part',
action='store_true', dest='nopart', help='do not use .part files', default=False)
filesystem.add_option('--no-mtime',
action='store_false', dest='updatetime',
help='do not use the Last-modified header to set the file modification time', default=True)
parser.add_option_group(filesystem) parser.add_option_group(filesystem)
(opts, args) = parser.parse_args() (opts, args) = parser.parse_args()
@ -2327,10 +2510,14 @@ if __name__ == '__main__':
except (IOError, OSError), err: except (IOError, OSError), err:
sys.exit(u'ERROR: unable to open cookie file') sys.exit(u'ERROR: unable to open cookie file')
# Dump user agent
if opts.dump_user_agent:
print std_headers['User-Agent']
sys.exit(0)
# General configuration # General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar) cookie_processor = urllib2.HTTPCookieProcessor(jar)
urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
urllib2.install_opener(urllib2.build_opener(cookie_processor))
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
# Batch file verification # Batch file verification
@ -2349,8 +2536,6 @@ if __name__ == '__main__':
all_urls = batchurls + args all_urls = batchurls + args
# Conflicting, missing and erroneous options # Conflicting, missing and erroneous options
if opts.bestquality:
print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
if opts.usenetrc and (opts.username is not None or opts.password is not None): if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error(u'using .netrc conflicts with giving username/password') parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None: if opts.password is not None and opts.username is None:
@ -2404,12 +2589,13 @@ if __name__ == '__main__':
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
'forceurl': opts.geturl, 'forceurl': opts.geturl,
'forcetitle': opts.gettitle, 'forcetitle': opts.gettitle,
'forcethumbnail': opts.getthumbnail, 'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription, 'forcedescription': opts.getdescription,
'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'forcefilename': opts.getfilename,
'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
'format': opts.format, 'format': opts.format,
'format_limit': opts.format_limit, 'format_limit': opts.format_limit,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
@ -2431,6 +2617,9 @@ if __name__ == '__main__':
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,
'playlistend': opts.playlistend, 'playlistend': opts.playlistend,
'logtostderr': opts.outtmpl == '-', 'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
'updatetime': opts.updatetime,
}) })
fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_search_ie)
fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_pl_ie)

@ -1,12 +1,12 @@
Name: youtube-dl Name: youtube-dl
Version: 2010.12.09 Version: 2011.01.30
Release: 1%{?dist} Release: 1%{?dist}
Summary: Small command-line program to download videos from YouTube Summary: Small command-line program to download videos from YouTube
Summary(pl): Tekstowy program do pobierania filmów z youtube.com Summary(pl): Tekstowy program do pobierania filmów z youtube.com
Group: Applications/Multimedia Group: Applications/Multimedia
License: Public Domain License: Public Domain
URL: http://rg3.github.com/youtube-dl/ URL: http://rg3.github.com/youtube-dl/
Source0: http://github.com/rg3/youtube-dl/raw/%{version}/youtube-dl Source0: https://github.com/rg3/youtube-dl/raw/%{version}/youtube-dl
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch BuildArch: noarch
Requires: python >= 2.4 Requires: python >= 2.4
@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT
%{_bindir}/%{name} %{_bindir}/%{name}
%changelog %changelog
* Mon Jan 31 2011 Till Maas <opensource@till.name> - 2010.01.30-1
- Update to latest release
* Sun Dec 12 2010 Till Maas <opensource@till.name> - 2010.12.09-1 * Sun Dec 12 2010 Till Maas <opensource@till.name> - 2010.12.09-1
- Update to latest release to adjust with youtube changes - Update to latest release to adjust with youtube changes

Loading…
Cancel
Save