From 992376b4d797d65c313112f4abfbf9f462cbe80f Mon Sep 17 00:00:00 2001 From: Krzysztof Kurzawski Date: Sat, 26 Jan 2008 11:33:26 +0000 Subject: [PATCH 001/279] import %{name} %{version} --- index.html | 165 +++++++++++++++++++++ youtube-dl | 387 ++++++++++++++++++++++++++++++++++++++++++++++++ youtube-dl.spec | 61 ++++++++ 3 files changed, 613 insertions(+) create mode 100644 index.html create mode 100644 youtube-dl create mode 100644 youtube-dl.spec diff --git a/index.html b/index.html new file mode 100644 index 0000000..9fd6cf0 --- /dev/null +++ b/index.html @@ -0,0 +1,165 @@ + + + + + youtube-dl: Download videos from YouTube.com + + + +

youtube-dl: Download videos from YouTube.com

+ +

What is it?

+ +

youtube-dl is a small command-line program to download videos +from YouTube.com. It requires the Python +interpreter, version 2.4 or later, and it's not platform specific. +It should work in your Unix box, in Windows or in Mac OS X. The latest version +is 2008.01.24. It's licensed under the MIT License, which +means you can modify it, redistribute it or use it however you like +complying with a few simple conditions.

+ +

I'll try to keep it updated if YouTube.com changes the way you access +their videos. After all, it's a simple and short program. However, I can't +guarantee anything. If you detect it stops working, check for new versions +and/or inform me about the problem, indicating the program version you +are using. If the program stops working and I can't solve the problem but +you have a solution, I'd like to know it. If that happens and you feel you +can maintain the program yourself, tell me. My contact information is +at freshmeat.net.

+ +

Thanks for all the feedback received so far. I'm glad people find my +program useful.

+ +

Related projects: +metacafe-dl +pornotube-dl +

+ +

Usage instructions

+ +

In Windows, once you have installed the Python interpreter, save the +program with the .py extension and put it somewhere in the PATH. +Try to follow the +guide to +install youtube-dl under Windows XP.

+ +

In Unix, download it, give it execution permission and copy it to one +of the PATH directories (typically, /usr/local/bin).

+ +

After that, you should be able to call it from the command line as +youtube-dl or youtube-dl.py. I will use youtube-dl +in the following examples. Usage instructions are easy. Use youtube-dl +followed by a video URL or identifier. Example: youtube-dl +"http://www.youtube.com/watch?v=foobar". The video will be saved +to the file foobar.flv in that example. As YouTube.com +videos are in Flash Video format, their extension should be flv. +In Linux and other unices, video players using a recent version of +ffmpeg can play them. That includes MPlayer, VLC, etc. Those two +work under Windows and other platforms, but you could also get a +specific FLV player of your taste.

+ +

If you try to run the program and you receive an error message containing the +keyword SyntaxError near the end, it means your Python interpreter +is too old.

+ +

More usage tips

+ + + +

Download it

+ +

Note that if you directly click on these hyperlinks, your web browser will +most likely display the program contents. It's usually better to +right-click on it and choose the appropriate option, normally called Save +Target As or Save Link As, depending on the web browser you +are using.

+ +

2008.01.24

+ + + + + diff --git a/youtube-dl b/youtube-dl new file mode 100644 index 0000000..1aa6123 --- /dev/null +++ b/youtube-dl @@ -0,0 +1,387 @@ +#!/usr/bin/env python +# +# Copyright (c) 2006-2008 Ricardo Garcia Gonzalez +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name(s) of the above copyright +# holders shall not be used in advertising or otherwise to promote the +# sale, use or other dealings in this Software without prior written +# authorization. +# +import getpass +import httplib +import math +import netrc +import optparse +import os +import re +import socket +import string +import sys +import time +import urllib2 + +# Global constants +const_1k = 1024 +const_initial_block_size = 10 * const_1k +const_epsilon = 0.0001 +const_timeout = 120 + +const_video_url_str = 'http://www.youtube.com/watch?v=%s' +const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') +const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' +const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' +const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' +const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' +const_url_t_param_re = re.compile(r', "t": "([^"]+)"') +const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s' +const_video_title_re = re.compile(r'YouTube - ([^<]*)', re.M | re.I) + +# Print error message, followed by standard advice information, and then exit +def error_advice_exit(error_text): + sys.stderr.write('Error: %s.\n' % error_text) + sys.stderr.write('Try again several times. It may be a temporary problem.\n') + sys.stderr.write('Other typical problems:\n\n') + sys.stderr.write('* Video no longer exists.\n') + sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n') + sys.stderr.write('* You provided the account data, but it is not valid.\n') + sys.stderr.write('* The connection was cut suddenly for some reason.\n') + sys.stderr.write('* YouTube changed their system, and the program no longer works.\n') + sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n') + sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n') + sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n') + sys.stderr.write('Try again several times and contact me if the problem persists.\n') + sys.exit('\n') + +# Wrapper to create custom requests with typical headers +def request_create(url, data=None): + retval = urllib2.Request(url) + if data is not None: + retval.add_data(data) + # Try to mimic Firefox, at least a little bit + retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') + retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') + retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') + retval.add_header('Accept-Language', 'en-us,en;q=0.5') + return retval + +# Perform a request, process headers and return response +def perform_request(url, data=None): + request = request_create(url, data) + response = urllib2.urlopen(request) + return response + +# Conditional print +def cond_print(str): + global cmdl_opts + if not (cmdl_opts.quiet or cmdl_opts.get_url): + sys.stdout.write(str) + sys.stdout.flush() + +# Title string normalization +def title_string_norm(title): + title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title) + title = '_'.join(title.split()) + title = title.lower() + return title + +# Generic download step +def download_step(return_data_flag, step_title, step_error, url, post_data=None): + try: + cond_print('%s... ' % step_title) + data = perform_request(url, post_data).read() + cond_print('done.\n') + if return_data_flag: + return data + return None + + except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): + cond_print('failed.\n') + error_advice_exit(step_error) + + except KeyboardInterrupt: + sys.exit('\n') + +# Generic extract step +def extract_step(step_title, step_error, regexp, data): + try: + cond_print('%s... ' % step_title) + match = regexp.search(data) + + if match is None: + cond_print('failed.\n') + error_advice_exit(step_error) + + extracted_data = match.group(1) + cond_print('done.\n') + return extracted_data + + except KeyboardInterrupt: + sys.exit('\n') + +# Calculate new block size based on previous block size +def new_block_size(before, after, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = max(bytes * 2.0, 1.0) + dif = after - before + if dif < const_epsilon: + return int(new_max) + rate = bytes / dif + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + +# Get optimum 1k exponent to represent a number of bytes +def optimum_k_exp(num_bytes): + global const_1k + if num_bytes == 0: + return 0 + return long(math.log(num_bytes, const_1k)) + +# Get optimum representation of number of bytes +def format_bytes(num_bytes): + global const_1k + try: + exp = optimum_k_exp(num_bytes) + suffix = 'bkMGTPEZY'[exp] + if exp == 0: + return '%s%s' % (num_bytes, suffix) + converted = float(num_bytes) / float(const_1k**exp) + return '%.2f%s' % (converted, suffix) + except IndexError: + sys.exit('Error: internal error formatting number of bytes.') + +# Calculate ETA and return it in string format as MM:SS +def calc_eta(start, now, total, current): + dif = now - start + if current == 0 or dif < const_epsilon: + return '--:--' + rate = float(current) / dif + eta = long((total - current) / rate) + (eta_mins, eta_secs) = divmod(eta, 60) + if eta_mins > 99: + return '--:--' + return '%02d:%02d' % (eta_mins, eta_secs) + +# Calculate speed and return it in string format +def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < const_epsilon: + return 'N/A b' + return format_bytes(float(bytes) / dif) + + +# Title string minimal transformation +def title_string_touch(title): + return title.replace(os.sep, '%') + +# Create the command line options parser and parse command line +cmdl_usage = 'usage: %prog [options] video_url' +cmdl_version = '2008.01.24' +cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve') +cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit') +cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit') +cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') +cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password') +cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name') +cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode') +cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video') +cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name') +cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name') +cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data') +cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only') +cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too') +(cmdl_opts, cmdl_args) = cmdl_parser.parse_args() + +# Set socket timeout +socket.setdefaulttimeout(const_timeout) + +# Get video URL +if len(cmdl_args) != 1: + cmdl_parser.print_help() + sys.exit('\n') +video_url_cmdl = cmdl_args[0] + +# Verify video URL format and convert to "standard" format +video_url_mo = const_video_url_re.match(video_url_cmdl) +if video_url_mo is None: + sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') +video_url_id = video_url_mo.group(2) +video_url = const_video_url_str % video_url_id + +# Check conflicting options +if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url): + sys.stderr.write('Warning: video file name given but will not be used.\n') + +if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal): + sys.exit('Error: using the video title conflicts with using a given file name.') + +if cmdl_opts.use_title and cmdl_opts.use_literal: + sys.exit('Error: cannot use title and literal title at the same time.') + +if cmdl_opts.quiet and cmdl_opts.get_url: + sys.exit('Error: cannot be quiet and print final URL at the same time.') + +# Incorrect option formatting +if cmdl_opts.username is None and cmdl_opts.password is not None: + sys.exit('Error: password give but username is missing.') + +if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None): + sys.exit('Error: cannot use netrc and username/password at the same time.') + +if cmdl_opts.get_url is None and cmdl_opts.get_title is not None: + sys.exit('Error: getting title requires getting URL.') + +# Get account information if any +account_username = None +account_password = None + +if cmdl_opts.use_netrc: + try: + info = netrc.netrc().authenticators('youtube') + if info is None: + sys.exit('Error: no authenticators for machine youtube.') + account_username = info[0] + account_password = info[2] + except IOError: + sys.exit('Error: unable to read .netrc file.') + except netrc.NetrcParseError: + sys.exit('Error: unable to parse .netrc file.') +else: + account_username = cmdl_opts.username + if account_username is not None: + if cmdl_opts.password is None: + account_password = getpass.getpass('Type YouTube password and press return: ') + else: + account_password = cmdl_opts.password + +# Get output file name +if cmdl_opts.outfile is None: + video_filename = '%s.flv' % video_url_id +else: + video_filename = cmdl_opts.outfile + +# Install cookie and proxy handlers +urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) +urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) + +# Log in and confirm age if needed +if account_username is not None: + url = const_login_url_str % video_url_id + post = const_login_post_str % (video_url_id, account_username, account_password) + download_step(False, 'Logging in', 'unable to log in', url, post) + + url = const_age_url_str % video_url_id + post = const_age_post_str % video_url_id + download_step(False, 'Confirming age', 'unable to confirm age', url, post) + +# Retrieve video webpage +video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url) + +# Extract video title if needed +if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title: + video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage) + +# Extract needed video URL parameters +video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage) +video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param) + +# Rebuild filename if needed +if cmdl_opts.use_title or cmdl_opts.use_literal: + if cmdl_opts.use_title: + prefix = title_string_norm(video_title) + else: + prefix = title_string_touch(video_title) + video_filename = '%s-%s.flv' % (prefix, video_url_id) + +# Check name +if not video_filename.lower().endswith('.flv'): + sys.stderr.write('Warning: video file name does not end in .flv\n') + +# Retrieve video data +try: + cond_print('Requesting video file... ') + video_data = perform_request(video_url_real) + cond_print('done.\n') + cond_print('Video data found at %s\n' % video_data.geturl()) + + if cmdl_opts.get_title: + print video_title + + if cmdl_opts.get_url: + print video_data.geturl() + + if cmdl_opts.simulate or cmdl_opts.get_url: + sys.exit() + + try: + video_file = open(video_filename, 'wb') + except (IOError, OSError): + sys.exit('Error: unable to open "%s" for writing.' % video_filename) + try: + video_len = long(video_data.info()['Content-length']) + video_len_str = format_bytes(video_len) + except KeyError: + video_len = None + video_len_str = 'N/A' + + byte_counter = 0 + block_size = const_initial_block_size + start_time = time.time() + while True: + if video_len is not None: + percent = float(byte_counter) / float(video_len) * 100.0 + percent_str = '%.1f' % percent + eta_str = calc_eta(start_time, time.time(), video_len, byte_counter) + else: + percent_str = '---.-' + eta_str = '--:--' + counter = format_bytes(byte_counter) + speed_str = calc_speed(start_time, time.time(), byte_counter) + cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str)) + + before = time.time() + video_block = video_data.read(block_size) + after = time.time() + dl_bytes = len(video_block) + if dl_bytes == 0: + break + byte_counter += dl_bytes + video_file.write(video_block) + block_size = new_block_size(before, after, dl_bytes) + + if video_len is not None and byte_counter != video_len: + error_advice_exit('server did not send the expected ammount of data') + + video_file.close() + cond_print('done.\n') + cond_print('Video data saved to %s\n' % video_filename) + +except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): + cond_print('failed.\n') + error_advice_exit('unable to download video data') + +except KeyboardInterrupt: + sys.exit('\n') + +# Finish +sys.exit() diff --git a/youtube-dl.spec b/youtube-dl.spec new file mode 100644 index 0000000..80d0cd9 --- /dev/null +++ b/youtube-dl.spec @@ -0,0 +1,61 @@ +Name: youtube-dl +Version: 2008.01.24 +Release: 1%{?dist} +Summary: Small command-line program to download videos from YouTube +Summary(pl): Tekstowy program do pobierania filmów z youtube.com +Group: Applications/Multimedia +License: MIT +URL: http://www.arrakis.es/~rggi3/youtube-dl/ +Source0: http://www.arrakis.es/~rggi3/youtube-dl/youtube-dl +Source1: http://www.arrakis.es/~rggi3/youtube-dl/index.html +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +BuildArch: noarch +Requires: python >= 2.4 + +%description +Small command-line program to download videos from YouTube. + +%description -l pl +youtube-dl to mały tekstowy program służący do pobierania filmów z +youtube.com. + +%prep +cp %{SOURCE1} . + +%build +#nothing to build + +%install +rm -rf $RPM_BUILD_ROOT +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -m 755 %{SOURCE0} $RPM_BUILD_ROOT%{_bindir} + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%{_bindir}/%{name} +%doc index.html + +%changelog +* Sat Jan 26 2008 Krzysztof Kurzawski 2008.01.24-1 +- Update to v2008.01.24 +- Add polish summary and description. + +* Wed Jan 02 2008 Krzysztof Kurzawski 2007.10.12-5 +- Correct install. +- Correct documentation. + +* Sat Dec 29 2007 Krzysztof Kurzawski 2007.10.12-4 +- Correct requires. +- Add documentation. + +* Sun Dec 23 2007 Krzysztof Kurzawski 2007.10.12-3 +- Correct version tag. + +* Fri Dec 14 2007 Krzysztof Kurzawski 1-2 +- Update to v2007.10.12, correct license and update summary. + +* Sun Dec 9 2007 Krzysztof Kurzawski 1-1 +- First release From f31769c79df0055a2213abf9473af34c9760db11 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Wed, 25 Feb 2009 18:13:50 +0000 Subject: [PATCH 002/279] - Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild --- youtube-dl.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 80d0cd9..3b80087 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl Version: 2008.01.24 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Wed Feb 25 2009 Fedora Release Engineering - 2008.01.24-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild + * Sat Jan 26 2008 Krzysztof Kurzawski 2008.01.24-1 - Update to v2008.01.24 - Add polish summary and description. From 8df9f09fcba3184bfdd35e492d567de81b810d52 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Mon, 27 Jul 2009 08:55:10 +0000 Subject: [PATCH 003/279] - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild --- youtube-dl.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 3b80087..6fda444 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl Version: 2008.01.24 -Release: 2%{?dist} +Release: 3%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Mon Jul 27 2009 Fedora Release Engineering - 2008.01.24-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild + * Wed Feb 25 2009 Fedora Release Engineering - 2008.01.24-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild From e3d407d6b3a7cf140c6b188eb3f54d9c7f5f9016 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Tue, 29 Sep 2009 07:27:47 +0000 Subject: [PATCH 004/279] Initialize branch F-12 for youtube-dl --- branch | 1 + 1 file changed, 1 insertion(+) create mode 100644 branch diff --git a/branch b/branch new file mode 100644 index 0000000..06de2d2 --- /dev/null +++ b/branch @@ -0,0 +1 @@ +F-12 From df48d1a7572c028005a40455333d1b1ff0b32961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Psota?= Date: Fri, 9 Oct 2009 16:41:25 +0000 Subject: [PATCH 005/279] update to 2009.09.13 --- index.html | 656 +++++++++++++++----- youtube-dl | 1573 ++++++++++++++++++++++++++++++++++++----------- youtube-dl.spec | 21 +- 3 files changed, 1717 insertions(+), 533 deletions(-) diff --git a/index.html b/index.html index 9fd6cf0..25f453b 100644 --- a/index.html +++ b/index.html @@ -1,165 +1,501 @@ - - + + - - youtube-dl: Download videos from YouTube.com - + + rg3 / youtube-dl / wiki / Home — bitbucket.org + + + + + + + + + + + + + + + + + + + + + - -

youtube-dl: Download videos from YouTube.com

- -

What is it?

- -

youtube-dl is a small command-line program to download videos -from YouTube.com. It requires the Python -interpreter, version 2.4 or later, and it's not platform specific. -It should work in your Unix box, in Windows or in Mac OS X. The latest version -is 2008.01.24. It's licensed under the MIT License, which -means you can modify it, redistribute it or use it however you like -complying with a few simple conditions.

- -

I'll try to keep it updated if YouTube.com changes the way you access -their videos. After all, it's a simple and short program. However, I can't -guarantee anything. If you detect it stops working, check for new versions -and/or inform me about the problem, indicating the program version you -are using. If the program stops working and I can't solve the problem but -you have a solution, I'd like to know it. If that happens and you feel you -can maintain the program yourself, tell me. My contact information is -at freshmeat.net.

- -

Thanks for all the feedback received so far. I'm glad people find my -program useful.

- -

Related projects: -metacafe-dl -pornotube-dl -

- -

Usage instructions

- -

In Windows, once you have installed the Python interpreter, save the -program with the .py extension and put it somewhere in the PATH. -Try to follow the -guide to -install youtube-dl under Windows XP.

- -

In Unix, download it, give it execution permission and copy it to one -of the PATH directories (typically, /usr/local/bin).

- -

After that, you should be able to call it from the command line as -youtube-dl or youtube-dl.py. I will use youtube-dl -in the following examples. Usage instructions are easy. Use youtube-dl -followed by a video URL or identifier. Example: youtube-dl -"http://www.youtube.com/watch?v=foobar". The video will be saved -to the file foobar.flv in that example. As YouTube.com -videos are in Flash Video format, their extension should be flv. -In Linux and other unices, video players using a recent version of -ffmpeg can play them. That includes MPlayer, VLC, etc. Those two -work under Windows and other platforms, but you could also get a -specific FLV player of your taste.

- -

If you try to run the program and you receive an error message containing the -keyword SyntaxError near the end, it means your Python interpreter -is too old.

- -

More usage tips

- -
    - -
  • You can change the file name of the video using the -o option, like in -youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar".
  • - -
  • Some videos require an account to be downloaded, mostly because they're -flagged as mature content. You can pass the program a username and password -for a YouTube.com account with the -u and -p options, like youtube-dl --u myusername -p mypassword "http://www.youtube.com/watch?v=foobar".
  • - -
  • The account data can also be read from the user .netrc file by indicating -the -n or --netrc option. The machine name is youtube in that -case.
  • - -
  • The simulate mode (activated with -s or --simulate) can be used -to just get the real video URL and use it with a download manager if you -prefer that option.
  • - -
  • The quiet mode (activated with -q or --quiet) can be used to -supress all output messages. This allows, in systems featuring /dev/stdout -and other similar special files, outputting the video data to standard output -in order to pipe it to another program without interferences.
  • - -
  • The program can be told to simply print the final video URL to standard -output using the -g or --get-url option.
  • - -
  • Combined with the above option, the -2 or --title-too option tells the -program to print the video title too.
  • - -
  • The default filename is video_id.flv. But you can also use the -video title in the filename with the -t or --title option.
  • - -
  • youtube-dl honors the http_proxy environment variable -if you want to use a proxy. Set it to something like -http://proxy.example.com:8080, and do not leave the http:// -prefix out.
  • - -
  • You can get the program version by calling it as youtube-dl --v or youtube-dl --version.
  • - -
  • For usage instructions, use youtube-dl -h or youtube-dl ---help.
  • - -
  • You can cancel the program at any time pressing Ctrl+C. It may print -some error lines saying something about KeyboardInterrupt. -That's ok.
  • - -
- -

Download it

- -

Note that if you directly click on these hyperlinks, your web browser will -most likely display the program contents. It's usually better to -right-click on it and choose the appropriate option, normally called Save -Target As or Save Link As, depending on the web browser you -are using.

- -

2008.01.24

-
    -
  • MD5: 48a7df743398bbfdf2710962000886cb
  • -
  • SHA1: 05ab118a157dbeedf7430d8445963c0340188c04
  • -
  • SHA256: 6e98f3abfa79a038e578d45441bb6675ac6d4880bae4b801d5125e364e60d5c5
  • -
+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + +
+ +
+ + +
+
+
+ + + + +

+ rg3 / + youtube-dl + (http://bitbucket.org/rg3/youtube-dl/wiki/) + +

+ + + + + +

youtube-dl is a small command-line program for downloading videos from YouTube.com.

+ +
Clone this repository (size: 199.5 KB): HTTPS / SSH
+
$ hg clone http://bitbucket.org/rg3/youtube-dl/
+ +
+ +
+ +
+ + + + + + + +
+ +
+
+ + + +
+



+

youtube-dl: Download videos from YouTube.com

+

(and more...)

+

What is it?

+

youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2009.09.13. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

+

I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

+

Thanks for all the feedback received so far. I'm glad people find my program useful.

+

Usage instructions

+

In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

+

In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

+

After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

+

If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

+

More usage tips

+
  • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. +
  • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". +
  • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. +
  • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. +
  • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. +
  • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. +
  • In a similar line, the -e or --get-title option tells the program to print the video title. +
  • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. +
  • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. +
  • The -b or --best-quality option can be used to download the highest available quality version of any given video. +
  • The -m or --mobile-version option is an alias for -f 17. +
  • The -d or --high-def option is an alias for -f 22. +
  • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. +
  • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. +
  • The program can be told not to overwrite existing files using the -w or --no-overwrites option. +
  • It can be told to attempt to continue interrupted downloads with the -c or --continue option. +
  • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. +
  • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". +
  • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. +
  • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. +
  • For usage instructions, use youtube-dl -h or youtube-dl --help. +
  • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +
+

Download it

+

Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

+

2009.09.13

+
  • MD5: db4e6acf6876f5df4896724be0084980 +
  • SHA1: c76eca4489d625b76955d2eda76be19960bd9f3b +
  • SHA256: d6ca29457644d3fca8915f50470d50a2599100365bfdc94784868d7884a98afe +
+

Output template

+

The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

+
  • id: The sequence will be replaced by the video identifier. +
  • url: The sequence will be replaced by the video URL. +
  • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. +
  • title: The sequence will be replaced by the literal video title. +
  • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. +
  • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). +
  • epoch: The sequence will be replaced by the Unix epoch when creating the file. +
+

As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

+

Authors

+
  • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. +
  • Danny Colligan: YouTube search InfoExtractor, ideas and patches. +
  • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. +
+

Copyright © 2006-2009 Ricardo Garcia Gonzalez

+ + + +
+ - +
+
+ +
+ + + diff --git a/youtube-dl b/youtube-dl index 1aa6123..5afff4e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -1,387 +1,1228 @@ #!/usr/bin/env python -# -# Copyright (c) 2006-2008 Ricardo Garcia Gonzalez -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Except as contained in this notice, the name(s) of the above copyright -# holders shall not be used in advertising or otherwise to promote the -# sale, use or other dealings in this Software without prior written -# authorization. -# -import getpass +# -*- coding: utf-8 -*- +# Author: Ricardo Garcia Gonzalez +# Author: Danny Colligan +# License: Public domain code +import htmlentitydefs import httplib +import locale import math import netrc -import optparse import os +import os.path import re import socket import string import sys import time +import urllib import urllib2 -# Global constants -const_1k = 1024 -const_initial_block_size = 10 * const_1k -const_epsilon = 0.0001 -const_timeout = 120 - -const_video_url_str = 'http://www.youtube.com/watch?v=%s' -const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') -const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' -const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' -const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' -const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' -const_url_t_param_re = re.compile(r', "t": "([^"]+)"') -const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s' -const_video_title_re = re.compile(r'YouTube - ([^<]*)', re.M | re.I) - -# Print error message, followed by standard advice information, and then exit -def error_advice_exit(error_text): - sys.stderr.write('Error: %s.\n' % error_text) - sys.stderr.write('Try again several times. It may be a temporary problem.\n') - sys.stderr.write('Other typical problems:\n\n') - sys.stderr.write('* Video no longer exists.\n') - sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n') - sys.stderr.write('* You provided the account data, but it is not valid.\n') - sys.stderr.write('* The connection was cut suddenly for some reason.\n') - sys.stderr.write('* YouTube changed their system, and the program no longer works.\n') - sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n') - sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n') - sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n') - sys.stderr.write('Try again several times and contact me if the problem persists.\n') - sys.exit('\n') - -# Wrapper to create custom requests with typical headers -def request_create(url, data=None): - retval = urllib2.Request(url) - if data is not None: - retval.add_data(data) - # Try to mimic Firefox, at least a little bit - retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') - retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') - retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') - retval.add_header('Accept-Language', 'en-us,en;q=0.5') - return retval - -# Perform a request, process headers and return response -def perform_request(url, data=None): - request = request_create(url, data) - response = urllib2.urlopen(request) - return response - -# Conditional print -def cond_print(str): - global cmdl_opts - if not (cmdl_opts.quiet or cmdl_opts.get_url): - sys.stdout.write(str) - sys.stdout.flush() - -# Title string normalization -def title_string_norm(title): - title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title) - title = '_'.join(title.split()) - title = title.lower() - return title - -# Generic download step -def download_step(return_data_flag, step_title, step_error, url, post_data=None): - try: - cond_print('%s... ' % step_title) - data = perform_request(url, post_data).read() - cond_print('done.\n') - if return_data_flag: - return data - return None +std_headers = { + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept-Language': 'en-us,en;q=0.5', +} - except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): - cond_print('failed.\n') - error_advice_exit(step_error) +simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') - except KeyboardInterrupt: - sys.exit('\n') +def preferredencoding(): + """Get preferred encoding. -# Generic extract step -def extract_step(step_title, step_error, regexp, data): + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ try: - cond_print('%s... ' % step_title) - match = regexp.search(data) - - if match is None: - cond_print('failed.\n') - error_advice_exit(step_error) - - extracted_data = match.group(1) - cond_print('done.\n') - return extracted_data + pref = locale.getpreferredencoding() + # Mac OSX systems have this problem sometimes + if pref == '': + return 'UTF-8' + return pref + except: + sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n') + return 'UTF-8' + +class DownloadError(Exception): + """Download Error exception. - except KeyboardInterrupt: - sys.exit('\n') - -# Calculate new block size based on previous block size -def new_block_size(before, after, bytes): - new_min = max(bytes / 2.0, 1.0) - new_max = max(bytes * 2.0, 1.0) - dif = after - before - if dif < const_epsilon: - return int(new_max) - rate = bytes / dif - if rate > new_max: - return int(new_max) - if rate < new_min: - return int(new_min) - return int(rate) - -# Get optimum 1k exponent to represent a number of bytes -def optimum_k_exp(num_bytes): - global const_1k - if num_bytes == 0: - return 0 - return long(math.log(num_bytes, const_1k)) - -# Get optimum representation of number of bytes -def format_bytes(num_bytes): - global const_1k - try: - exp = optimum_k_exp(num_bytes) - suffix = 'bkMGTPEZY'[exp] - if exp == 0: - return '%s%s' % (num_bytes, suffix) - converted = float(num_bytes) / float(const_1k**exp) + This exception may be thrown by FileDownloader objects if they are not + configured to continue on errors. They will contain the appropriate + error message. + """ + pass + +class SameFileError(Exception): + """Same File exception. + + This exception will be thrown by FileDownloader objects if they detect + multiple files would have to be downloaded to the same file on disk. + """ + pass + +class PostProcessingError(Exception): + """Post Processing exception. + + This exception may be raised by PostProcessor's .run() method to + indicate an error in the postprocessing task. + """ + pass + +class UnavailableFormatError(Exception): + """Unavailable Format exception. + + This exception will be thrown when a video is requested + in a format that is not available for that video. + """ + pass + +class ContentTooShortError(Exception): + """Content Too Short exception. + + This exception may be raised by FileDownloader objects when a file they + download is too small for what the server announced first, indicating + the connection was probably interrupted. + """ + # Both in bytes + downloaded = None + expected = None + + def __init__(self, downloaded, expected): + self.downloaded = downloaded + self.expected = expected + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk if the user has requested + it, among some other tasks. In most cases there should be one per + program. As, given a video URL, the downloader doesn't know how to + extract all the needed information, task that InfoExtractors do, it + has to pass the URL to one of them. + + For this, file downloader objects have a method that allows + InfoExtractors to be registered in a given order. When it is passed + a URL, the file downloader handles it to the first InfoExtractor it + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + asks the FileDownloader to process the video information, possibly + downloading the video. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. These options are available through the params + attribute for the InfoExtractors to use. The FileDownloader also + registers itself as the downloader in charge for the InfoExtractors + that are added to it, so this is a "mutual registration". + + Available options: + + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + simulate: Do not download the video files. + format: Video format code. + outtmpl: Template for output names. + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + continuedl: Try to continue downloads if possible. + """ + + params = None + _ies = [] + _pps = [] + _download_retcode = None + + def __init__(self, params): + """Create a FileDownloader object with the given options.""" + self._ies = [] + self._pps = [] + self._download_retcode = 0 + self.params = params + + @staticmethod + def pmkdir(filename): + """Create directory components in filename. Similar to Unix "mkdir -p".""" + components = filename.split(os.sep) + aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] + aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator + for dir in aggregate: + if not os.path.exists(dir): + os.mkdir(dir) + + @staticmethod + def format_bytes(bytes): + if bytes is None: + return 'N/A' + if type(bytes) is str: + bytes = float(bytes) + if bytes == 0.0: + exponent = 0 + else: + exponent = long(math.log(bytes, 1024.0)) + suffix = 'bkMGTPEZY'[exponent] + converted = float(bytes) / float(1024**exponent) return '%.2f%s' % (converted, suffix) - except IndexError: - sys.exit('Error: internal error formatting number of bytes.') - -# Calculate ETA and return it in string format as MM:SS -def calc_eta(start, now, total, current): - dif = now - start - if current == 0 or dif < const_epsilon: - return '--:--' - rate = float(current) / dif - eta = long((total - current) / rate) - (eta_mins, eta_secs) = divmod(eta, 60) - if eta_mins > 99: - return '--:--' - return '%02d:%02d' % (eta_mins, eta_secs) - -# Calculate speed and return it in string format -def calc_speed(start, now, bytes): - dif = now - start - if bytes == 0 or dif < const_epsilon: - return 'N/A b' - return format_bytes(float(bytes) / dif) - - -# Title string minimal transformation -def title_string_touch(title): - return title.replace(os.sep, '%') - -# Create the command line options parser and parse command line -cmdl_usage = 'usage: %prog [options] video_url' -cmdl_version = '2008.01.24' -cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve') -cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit') -cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit') -cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') -cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password') -cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name') -cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode') -cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video') -cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name') -cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name') -cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data') -cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only') -cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too') -(cmdl_opts, cmdl_args) = cmdl_parser.parse_args() - -# Set socket timeout -socket.setdefaulttimeout(const_timeout) - -# Get video URL -if len(cmdl_args) != 1: - cmdl_parser.print_help() - sys.exit('\n') -video_url_cmdl = cmdl_args[0] - -# Verify video URL format and convert to "standard" format -video_url_mo = const_video_url_re.match(video_url_cmdl) -if video_url_mo is None: - sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') -video_url_id = video_url_mo.group(2) -video_url = const_video_url_str % video_url_id - -# Check conflicting options -if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url): - sys.stderr.write('Warning: video file name given but will not be used.\n') - -if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal): - sys.exit('Error: using the video title conflicts with using a given file name.') - -if cmdl_opts.use_title and cmdl_opts.use_literal: - sys.exit('Error: cannot use title and literal title at the same time.') - -if cmdl_opts.quiet and cmdl_opts.get_url: - sys.exit('Error: cannot be quiet and print final URL at the same time.') - -# Incorrect option formatting -if cmdl_opts.username is None and cmdl_opts.password is not None: - sys.exit('Error: password give but username is missing.') - -if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None): - sys.exit('Error: cannot use netrc and username/password at the same time.') - -if cmdl_opts.get_url is None and cmdl_opts.get_title is not None: - sys.exit('Error: getting title requires getting URL.') - -# Get account information if any -account_username = None -account_password = None - -if cmdl_opts.use_netrc: - try: - info = netrc.netrc().authenticators('youtube') - if info is None: - sys.exit('Error: no authenticators for machine youtube.') - account_username = info[0] - account_password = info[2] - except IOError: - sys.exit('Error: unable to read .netrc file.') - except netrc.NetrcParseError: - sys.exit('Error: unable to parse .netrc file.') -else: - account_username = cmdl_opts.username - if account_username is not None: - if cmdl_opts.password is None: - account_password = getpass.getpass('Type YouTube password and press return: ') + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return '---.-%' + return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return '--:--' + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return '--:--' + rate = float(current) / dif + eta = long((float(total) - float(current)) / rate) + (eta_mins, eta_secs) = divmod(eta, 60) + if eta_mins > 99: + return '--:--' + return '%02d:%02d' % (eta_mins, eta_secs) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return '%10s' % '---b/s' + return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return long(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return long(new_max) + if rate < new_min: + return long(new_min) + return long(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into a long integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return long(round(number * multiplier)) + + @staticmethod + def verify_url(url): + """Verify a URL is valid and data could be downloaded. Return real data URL.""" + request = urllib2.Request(url, None, std_headers) + data = urllib2.urlopen(request) + data.read(1) + url = data.geturl() + data.close() + return url + + def add_info_extractor(self, ie): + """Add an InfoExtractor object to the end of the list.""" + self._ies.append(ie) + ie.set_downloader(self) + + def add_post_processor(self, pp): + """Add a PostProcessor object to the end of the chain.""" + self._pps.append(pp) + pp.set_downloader(self) + + def to_stdout(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode.""" + if not self.params.get('quiet', False): + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), + sys.stdout.flush() + + def to_stderr(self, message): + """Print message to stderr.""" + print >>sys.stderr, message.encode(preferredencoding()) + + def fixed_template(self): + """Checks if the output template is fixed.""" + return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) + + def trouble(self, message=None): + """Determine action to take when a download problem appears. + + Depending on if the downloader has been configured to ignore + download errors or not, this method may throw an exception or + not when errors are found, after printing the message. + """ + if message is not None: + self.to_stderr(message) + if not self.params.get('ignoreerrors', False): + raise DownloadError(message) + self._download_retcode = 1 + + def slow_down(self, start_time, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit', None) + if rate_limit is None or byte_counter == 0: + return + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + + def report_destination(self, filename): + """Report destination filename.""" + self.to_stdout(u'[download] Destination: %s' % filename) + + def report_progress(self, percent_str, data_len_str, speed_str, eta_str): + """Report download progress.""" + self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + + def report_resuming_byte(self, resume_len): + """Report attemtp to resume at given byte.""" + self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + self.to_stdout(u'[download] %s has already been downloaded' % file_name) + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_stdout(u'[download] Unable to resume') + + def report_finish(self): + """Report download finished.""" + self.to_stdout(u'') + + def process_info(self, info_dict): + """Process a single dictionary returned by an InfoExtractor.""" + # Do nothing else if in simulate mode + if self.params.get('simulate', False): + try: + info_dict['url'] = self.verify_url(info_dict['url']) + except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: + raise UnavailableFormatError + + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'].encode(preferredencoding()) + if self.params.get('forceurl', False): + print info_dict['url'].encode(preferredencoding()) + + return + + try: + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + filename = self.params['outtmpl'] % template_dict + except (ValueError, KeyError), err: + self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + if self.params['nooverwrites'] and os.path.exists(filename): + self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) + return + + try: + self.pmkdir(filename) + except (OSError, IOError), err: + self.trouble('ERROR: unable to create directories: %s' % str(err)) + return + + try: + success = self._do_download(filename, info_dict['url']) + except (OSError, IOError), err: + raise UnavailableFormatError + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.trouble('ERROR: unable to download video data: %s' % str(err)) + return + except (ContentTooShortError, ), err: + self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + + if success: + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + self.trouble('ERROR: postprocessing: %s' % str(err)) + return + + def download(self, url_list): + """Download a given list of URLs.""" + if len(url_list) > 1 and self.fixed_template(): + raise SameFileError(self.params['outtmpl']) + + for url in url_list: + suitable_found = False + for ie in self._ies: + # Go to next InfoExtractor if not suitable + if not ie.suitable(url): + continue + + # Suitable InfoExtractor found + suitable_found = True + + # Extract information from URL and process it + ie.extract(url) + + # Suitable InfoExtractor had been found; go to next URL + break + + if not suitable_found: + self.trouble('ERROR: no suitable InfoExtractor: %s' % url) + + return self._download_retcode + + def post_process(self, filename, ie_info): + """Run the postprocessing chain on the given file.""" + info = dict(ie_info) + info['filepath'] = filename + for pp in self._pps: + info = pp.run(info) + if info is None: + break + + def _do_download(self, filename, url): + stream = None + open_mode = 'ab' + + basic_request = urllib2.Request(url, None, std_headers) + request = urllib2.Request(url, None, std_headers) + + # Attempt to resume download with "continuedl" option + if os.path.isfile(filename): + resume_len = os.path.getsize(filename) else: - account_password = cmdl_opts.password - -# Get output file name -if cmdl_opts.outfile is None: - video_filename = '%s.flv' % video_url_id -else: - video_filename = cmdl_opts.outfile - -# Install cookie and proxy handlers -urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) -urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) - -# Log in and confirm age if needed -if account_username is not None: - url = const_login_url_str % video_url_id - post = const_login_post_str % (video_url_id, account_username, account_password) - download_step(False, 'Logging in', 'unable to log in', url, post) - - url = const_age_url_str % video_url_id - post = const_age_post_str % video_url_id - download_step(False, 'Confirming age', 'unable to confirm age', url, post) - -# Retrieve video webpage -video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url) - -# Extract video title if needed -if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title: - video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage) - -# Extract needed video URL parameters -video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage) -video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param) - -# Rebuild filename if needed -if cmdl_opts.use_title or cmdl_opts.use_literal: - if cmdl_opts.use_title: - prefix = title_string_norm(video_title) - else: - prefix = title_string_touch(video_title) - video_filename = '%s-%s.flv' % (prefix, video_url_id) - -# Check name -if not video_filename.lower().endswith('.flv'): - sys.stderr.write('Warning: video file name does not end in .flv\n') - -# Retrieve video data -try: - cond_print('Requesting video file... ') - video_data = perform_request(video_url_real) - cond_print('done.\n') - cond_print('Video data found at %s\n' % video_data.geturl()) - - if cmdl_opts.get_title: - print video_title - - if cmdl_opts.get_url: - print video_data.geturl() - - if cmdl_opts.simulate or cmdl_opts.get_url: - sys.exit() + resume_len = 0 + if self.params['continuedl'] and resume_len != 0: + self.report_resuming_byte(resume_len) + request.add_header('Range','bytes=%d-' % resume_len) - try: - video_file = open(video_filename, 'wb') - except (IOError, OSError): - sys.exit('Error: unable to open "%s" for writing.' % video_filename) - try: - video_len = long(video_data.info()['Content-length']) - video_len_str = format_bytes(video_len) - except KeyError: - video_len = None - video_len_str = 'N/A' - - byte_counter = 0 - block_size = const_initial_block_size - start_time = time.time() - while True: - if video_len is not None: - percent = float(byte_counter) / float(video_len) * 100.0 - percent_str = '%.1f' % percent - eta_str = calc_eta(start_time, time.time(), video_len, byte_counter) + # Establish connection + try: + data = urllib2.urlopen(request) + except (urllib2.HTTPError, ), err: + if err.code != 416: # 416 is 'Requested range not satisfiable' + raise + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + if content_length is not None and long(content_length) == resume_len: + self.report_file_already_downloaded(filename) + return True + else: + self.report_unable_to_resume() + open_mode = 'wb' + + data_len = data.info().get('Content-length', None) + data_len_str = self.format_bytes(data_len) + byte_counter = 0 + block_size = 1024 + start = time.time() + while True: + # Download and write + before = time.time() + data_block = data.read(block_size) + after = time.time() + data_block_len = len(data_block) + if data_block_len == 0: + break + byte_counter += data_block_len + + # Open file just in time + if stream is None: + try: + stream = open(filename, open_mode) + self.report_destination(filename) + except (OSError, IOError), err: + self.trouble('ERROR: unable to open for writing: %s' % str(err)) + return False + stream.write(data_block) + block_size = self.best_block_size(after - before, data_block_len) + + # Progress message + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) + speed_str = self.calc_speed(start, time.time(), byte_counter) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) + + # Apply rate limit + self.slow_down(start, byte_counter) + + self.report_finish() + if data_len is not None and str(byte_counter) != data_len: + raise ContentTooShortError(byte_counter, long(data_len)) + return True + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information from the video (or videos) the URL refers to. This + information includes the real video URL, the video title and simplified + title, author and others. The information is stored in a dictionary + which is then passed to the FileDownloader. The FileDownloader + processes this information possibly downloading the video to the file + system, among other possible outcomes. The dictionaries must include + the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader. + title: Literal title. + stitle: Simplified title. + ext: Video filename extension. + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods, as well as the suitable() static method. + Probably, they should also be instantiated and added to the main + downloader. + """ + + _ready = False + _downloader = None + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader.""" + self._ready = False + self.set_downloader(downloader) + + @staticmethod + def suitable(url): + """Receives a URL and returns True if suitable for this IE.""" + return False + + def initialize(self): + """Initializes an instance (authentication, etc).""" + if not self._ready: + self._real_initialize() + self._ready = True + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + self.initialize() + return self._real_extract(url) + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + +class YoutubeIE(InfoExtractor): + """Information extractor for youtube.com.""" + + _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' + _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' + _NETRC_MACHINE = 'youtube' + _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag + _video_extensions = { + '13': '3gp', + '17': 'mp4', + '18': 'mp4', + '22': 'mp4', + } + + @staticmethod + def suitable(url): + return (re.match(YoutubeIE._VALID_URL, url) is not None) + + @staticmethod + def htmlentity_transform(matchobj): + """Transforms an HTML entity to a Unicode character.""" + entity = matchobj.group(1) + + # Known non-numeric HTML entity + if entity in htmlentitydefs.name2codepoint: + return unichr(htmlentitydefs.name2codepoint[entity]) + + # Unicode character + mobj = re.match(ur'(?u)#(x?\d+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith(u'x'): + base = 16 + numstr = u'0%s' % numstr + else: + base = 10 + return unichr(long(numstr, base)) + + # Unknown entity in name, return its literal representation + return (u'&%s;' % entity) + + def report_lang(self): + """Report attempt to set language.""" + self._downloader.to_stdout(u'[youtube] Setting language') + + def report_login(self): + """Report attempt to log in.""" + self._downloader.to_stdout(u'[youtube] Logging in') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[youtube] Confirming age') + + def report_video_info_webpage_download(self, video_id): + """Report attempt to download video info webpage.""" + self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) + + def report_information_extraction(self, video_id): + """Report attempt to extract video information.""" + self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + + def report_unavailable_format(self, video_id, format): + """Report extracted video URL.""" + self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + + def _real_initialize(self): + if self._downloader is None: + return + + username = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + return + + # Set language + request = urllib2.Request(self._LANG_URL, None, std_headers) + try: + self.report_lang() + urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + return + + # No authentication to be performed + if username is None: + return + + # Log in + login_form = { + 'current_form': 'loginForm', + 'next': '/', + 'action_login': 'Log In', + 'username': username, + 'password': password, + } + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) + try: + self.report_login() + login_results = urllib2.urlopen(request).read() + if re.search(r'(?i)]* name="loginForm"', login_results) is not None: + self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + return + + # Confirm age + age_form = { + 'next_url': '/', + 'action_confirm': 'Confirm', + } + request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) + try: + self.report_age_confirmation() + age_results = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract video id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group(2) + + # Downloader parameters + best_quality = False + format_param = None + quality_index = 0 + if self._downloader is not None: + params = self._downloader.params + format_param = params.get('format', None) + if format_param == '0': + format_param = self._available_formats[quality_index] + best_quality = True + + while True: + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + + # Get video info + video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id + request = urllib2.Request(video_info_url, None, std_headers) + try: + self.report_video_info_webpage_download(video_id) + video_info_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + self.report_information_extraction(video_id) + + # "t" param + mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + # Attempt to see if YouTube has issued an error message + mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') + stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') + stream.write(video_info_webpage) + stream.close() + else: + reason = urllib.unquote_plus(mobj.group(1)) + self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + return + token = urllib.unquote(mobj.group(1)) + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) + if format_param is not None: + video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + + # uploader + mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote(mobj.group(1)) + + # title + mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote(mobj.group(1)) + video_title = video_title.decode('utf-8') + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) + video_title = video_title.replace(os.sep, u'%') + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + }) + + return + + except UnavailableFormatError, err: + if best_quality: + if quality_index == len(self._available_formats) - 1: + # I don't ever expect this to happen + self._downloader.trouble(u'ERROR: no known formats available for video') + return + else: + self.report_unavailable_format(video_id, format_param) + quality_index += 1 + format_param = self._available_formats[quality_index] + continue + else: + self._downloader.trouble('ERROR: format not available for video') + return + + +class MetacafeIE(InfoExtractor): + """Information Extractor for metacafe.com.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' + _DISCLAIMER = 'http://www.metacafe.com/family_filter/' + _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(MetacafeIE._VALID_URL, url) is not None) + + def report_disclaimer(self): + """Report disclaimer retrieval.""" + self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[metacafe] Confirming age') + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) + + def _real_initialize(self): + # Retrieve disclaimer + request = urllib2.Request(self._DISCLAIMER, None, std_headers) + try: + self.report_disclaimer() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + return + + # Confirm age + disclaimer_form = { + 'filters': '0', + 'submit': "Continue - I'm over 18", + } + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + try: + self.report_age_confirmation() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + # Check if video comes from YouTube + mobj2 = re.match(r'^yt-(.*)$', video_id) + if mobj2 is not None: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + return + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + #if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract gdaKey') + # return + #gdaKey = mobj.group(1) + # + #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) + + video_url = mediaURL + + mobj = re.search(r'(?im)(.*) - Video', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + + mobj = re.search(r'(?ms)
  • .*?Submitter:.*?(.*?)<', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class YoutubeSearchIE(InfoExtractor): + """Information Extractor for YouTube search queries.""" + _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' + _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' + _youtube_ie = None + _max_youtube_results = 1000 + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_youtube_results) + return else: - percent_str = '---.-' - eta_str = '--:--' - counter = format_bytes(byte_counter) - speed_str = calc_speed(start_time, time.time(), byte_counter) - cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str)) - - before = time.time() - video_block = video_data.read(block_size) - after = time.time() - dl_bytes = len(video_block) - if dl_bytes == 0: - break - byte_counter += dl_bytes - video_file.write(video_block) - block_size = new_block_size(before, after, dl_bytes) - - if video_len is not None and byte_counter != video_len: - error_advice_exit('server did not send the expected ammount of data') - - video_file.close() - cond_print('done.\n') - cond_print('Video data saved to %s\n' % video_filename) - -except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): - cond_print('failed.\n') - error_advice_exit('unable to download video data') - -except KeyboardInterrupt: - sys.exit('\n') - -# Finish -sys.exit() + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_youtube_results: + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + n = self._max_youtube_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + + pagenum = pagenum + 1 + +class YoutubePlaylistIE(InfoExtractor): + """Information Extractor for YouTube playlists.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' + _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) + + def report_download_page(self, playlist_id, pagenum): + """Report attempt to download playlist page with given number.""" + self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract playlist id + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download playlist pages + playlist_id = mobj.group(1) + video_ids = [] + pagenum = 1 + + while True: + self.report_download_page(playlist_id, pagenum) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + break + pagenum = pagenum + 1 + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + +class PostProcessor(object): + """Post Processor class. + + PostProcessor objects can be added to downloaders with their + add_post_processor() method. When the downloader has finished a + successful download, it will take its internal chain of PostProcessors + and start calling the run() method on each one of them, first with + an initial argument and then with the returned value of the previous + PostProcessor. + + The chain will be stopped if one of them ever returns None or the end + of the chain is reached. + + PostProcessor objects follow a "mutual registration" process similar + to InfoExtractor objects. + """ + + _downloader = None + + def __init__(self, downloader=None): + self._downloader = downloader + + def set_downloader(self, downloader): + """Sets the downloader for this PP.""" + self._downloader = downloader + + def run(self, information): + """Run the PostProcessor. + + The "information" argument is a dictionary like the ones + composed by InfoExtractors. The only difference is that this + one has an extra field called "filepath" that points to the + downloaded file. + + When this method returns None, the postprocessing chain is + stopped. However, this method may return an information + dictionary that will be passed to the next postprocessing + object in the chain. It can be the one it received after + changing some fields. + + In addition, this method may raise a PostProcessingError + exception that will be taken into account by the downloader + it was called from. + """ + return information # by default, do nothing + +### MAIN PROGRAM ### +if __name__ == '__main__': + try: + # Modules needed only when running the main program + import getpass + import optparse + + # General configuration + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) + urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Parse command line + parser = optparse.OptionParser( + usage='Usage: %prog [options] url...', + version='2009.09.13', + conflict_handler='resolve', + ) + + parser.add_option('-h', '--help', + action='help', help='print this help text and exit') + parser.add_option('-v', '--version', + action='version', help='print program version and exit') + parser.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + parser.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + + authentication = optparse.OptionGroup(parser, 'Authentication Options') + authentication.add_option('-u', '--username', + dest='username', metavar='UN', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PW', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + parser.add_option_group(authentication) + + video_format = optparse.OptionGroup(parser, 'Video Format Options') + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FMT', help='video format code') + video_format.add_option('-b', '--best-quality', + action='store_const', dest='format', help='download the best quality video possible', const='0') + video_format.add_option('-m', '--mobile-version', + action='store_const', dest='format', help='alias for -f 17', const='17') + video_format.add_option('-d', '--high-def', + action='store_const', dest='format', help='alias for -f 22', const='22') + parser.add_option_group(video_format) + + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + parser.add_option_group(verbosity) + + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TPL', help='output filename template') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='F', help='file containing URLs to download') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + parser.add_option_group(filesystem) + + (opts, args) = parser.parse_args() + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + batchurls = open(opts.batchfile, 'r').readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # Conflicting, missing and erroneous options + if len(all_urls) < 1: + parser.error(u'you must provide at least one URL') + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): + parser.error(u'using output template conflicts with using title or literal title') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + + # Information extractors + youtube_ie = YoutubeIE() + metacafe_ie = MetacafeIE(youtube_ie) + youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_search_ie = YoutubeSearchIE(youtube_ie) + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle), + 'format': opts.format, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'continuedl': opts.continue_dl, + }) + fd.add_info_extractor(youtube_search_ie) + fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(youtube_ie) + retcode = fd.download(all_urls) + sys.exit(retcode) + + except DownloadError: + sys.exit(1) + except SameFileError: + sys.exit(u'ERROR: fixed output name but more than one file to download') + except KeyboardInterrupt: + sys.exit(u'\nERROR: Interrupted by user') diff --git a/youtube-dl.spec b/youtube-dl.spec index 6fda444..2dbfd7c 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,13 +1,13 @@ Name: youtube-dl -Version: 2008.01.24 -Release: 3%{?dist} +Version: 2009.09.13 +Release: 2%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia -License: MIT -URL: http://www.arrakis.es/~rggi3/youtube-dl/ -Source0: http://www.arrakis.es/~rggi3/youtube-dl/youtube-dl -Source1: http://www.arrakis.es/~rggi3/youtube-dl/index.html +License: Public Domain +URL: http://bitbucket.org/rg3/youtube-dl +Source0: http://bitbucket.org/rg3/youtube-dl/raw/%{version}/youtube-dl +Source1: http://bitbucket.org/rg3/youtube-dl/wiki/Home BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch Requires: python >= 2.4 @@ -20,7 +20,7 @@ youtube-dl to mały tekstowy program służący do pobierania filmów z youtube.com. %prep -cp %{SOURCE1} . +install -p -m0644 %{SOURCE1} index.html %build #nothing to build @@ -39,6 +39,13 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Oct 09 2009 Rafał Psota - 2009.09.13-2 +- Small fix in %%prep + +* Sun Sep 27 2009 Rafał Psota - 2009.09.13-1 +- Update to 2009.09.13 +- License change to Public Domain + * Mon Jul 27 2009 Fedora Release Engineering - 2008.01.24-3 - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild From e2b3e964a4835be1ad7952f541af62193801445a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Psota?= Date: Fri, 9 Oct 2009 16:48:38 +0000 Subject: [PATCH 006/279] forgot about Home file --- Home | 501 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 Home diff --git a/Home b/Home new file mode 100644 index 0000000..25f453b --- /dev/null +++ b/Home @@ -0,0 +1,501 @@ + + + + + rg3 / youtube-dl / wiki / Home — bitbucket.org + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + +
    + +
    + +
    + + + +
    + +
    + + +
    +
    +
    + + + + +

    + rg3 / + youtube-dl + (http://bitbucket.org/rg3/youtube-dl/wiki/) + +

    + + + + + +

    youtube-dl is a small command-line program for downloading videos from YouTube.com.

    + +
    Clone this repository (size: 199.5 KB): HTTPS / SSH
    +
    $ hg clone http://bitbucket.org/rg3/youtube-dl/
    + +
    + +
    + +
    + + + + + + + +
    + +
    +
    + + + +
    +

    

    +

    youtube-dl: Download videos from YouTube.com

    +

    (and more...)

    +

    What is it?

    +

    youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2009.09.13. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

    +

    I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

    +

    Thanks for all the feedback received so far. I'm glad people find my program useful.

    +

    Usage instructions

    +

    In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

    +

    In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

    +

    After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

    +

    If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

    +

    More usage tips

    +
    • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. +
    • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". +
    • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. +
    • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. +
    • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. +
    • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. +
    • In a similar line, the -e or --get-title option tells the program to print the video title. +
    • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. +
    • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. +
    • The -b or --best-quality option can be used to download the highest available quality version of any given video. +
    • The -m or --mobile-version option is an alias for -f 17. +
    • The -d or --high-def option is an alias for -f 22. +
    • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. +
    • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. +
    • The program can be told not to overwrite existing files using the -w or --no-overwrites option. +
    • It can be told to attempt to continue interrupted downloads with the -c or --continue option. +
    • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. +
    • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". +
    • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. +
    • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. +
    • For usage instructions, use youtube-dl -h or youtube-dl --help. +
    • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +
    +

    Download it

    +

    Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

    +

    2009.09.13

    +
    • MD5: db4e6acf6876f5df4896724be0084980 +
    • SHA1: c76eca4489d625b76955d2eda76be19960bd9f3b +
    • SHA256: d6ca29457644d3fca8915f50470d50a2599100365bfdc94784868d7884a98afe +
    +

    Output template

    +

    The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

    +
    • id: The sequence will be replaced by the video identifier. +
    • url: The sequence will be replaced by the video URL. +
    • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. +
    • title: The sequence will be replaced by the literal video title. +
    • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. +
    • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). +
    • epoch: The sequence will be replaced by the Unix epoch when creating the file. +
    +

    As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

    +

    Authors

    +
    • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. +
    • Danny Colligan: YouTube search InfoExtractor, ideas and patches. +
    • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. +
    +

    Copyright © 2006-2009 Ricardo Garcia Gonzalez

    + + + +
    + + +
    +
    + +
    + + + + + From f24b41802c605fa7262a2ee66b51023d8a7afa12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Psota?= Date: Fri, 9 Oct 2009 16:53:36 +0000 Subject: [PATCH 007/279] update to 2009.09.13 --- Home | 501 +++++++++++++++ youtube-dl | 1573 ++++++++++++++++++++++++++++++++++++----------- youtube-dl.spec | 21 +- 3 files changed, 1722 insertions(+), 373 deletions(-) create mode 100644 Home diff --git a/Home b/Home new file mode 100644 index 0000000..25f453b --- /dev/null +++ b/Home @@ -0,0 +1,501 @@ + + + + + rg3 / youtube-dl / wiki / Home — bitbucket.org + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + +
    + +
    + +
    + + + +
    + +
    + + +
    +
    +
    + + + + +

    + rg3 / + youtube-dl + (http://bitbucket.org/rg3/youtube-dl/wiki/) + +

    + + + + + +

    youtube-dl is a small command-line program for downloading videos from YouTube.com.

    + +
    Clone this repository (size: 199.5 KB): HTTPS / SSH
    +
    $ hg clone http://bitbucket.org/rg3/youtube-dl/
    + +
    + +
    + +
    + + + + + + + +
    + +
    +
    + + + +
    +

    

    +

    youtube-dl: Download videos from YouTube.com

    +

    (and more...)

    +

    What is it?

    +

    youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2009.09.13. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

    +

    I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

    +

    Thanks for all the feedback received so far. I'm glad people find my program useful.

    +

    Usage instructions

    +

    In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

    +

    In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

    +

    After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

    +

    If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

    +

    More usage tips

    +
    • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. +
    • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". +
    • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. +
    • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. +
    • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. +
    • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. +
    • In a similar line, the -e or --get-title option tells the program to print the video title. +
    • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. +
    • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. +
    • The -b or --best-quality option can be used to download the highest available quality version of any given video. +
    • The -m or --mobile-version option is an alias for -f 17. +
    • The -d or --high-def option is an alias for -f 22. +
    • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. +
    • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. +
    • The program can be told not to overwrite existing files using the -w or --no-overwrites option. +
    • It can be told to attempt to continue interrupted downloads with the -c or --continue option. +
    • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. +
    • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". +
    • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. +
    • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. +
    • For usage instructions, use youtube-dl -h or youtube-dl --help. +
    • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +
    +

    Download it

    +

    Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

    +

    2009.09.13

    +
    • MD5: db4e6acf6876f5df4896724be0084980 +
    • SHA1: c76eca4489d625b76955d2eda76be19960bd9f3b +
    • SHA256: d6ca29457644d3fca8915f50470d50a2599100365bfdc94784868d7884a98afe +
    +

    Output template

    +

    The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

    +
    • id: The sequence will be replaced by the video identifier. +
    • url: The sequence will be replaced by the video URL. +
    • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. +
    • title: The sequence will be replaced by the literal video title. +
    • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. +
    • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). +
    • epoch: The sequence will be replaced by the Unix epoch when creating the file. +
    +

    As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

    +

    Authors

    +
    • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. +
    • Danny Colligan: YouTube search InfoExtractor, ideas and patches. +
    • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. +
    +

    Copyright © 2006-2009 Ricardo Garcia Gonzalez

    + + + +
    + + +
    +
    + +
    + + + + + diff --git a/youtube-dl b/youtube-dl index 1aa6123..5afff4e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -1,387 +1,1228 @@ #!/usr/bin/env python -# -# Copyright (c) 2006-2008 Ricardo Garcia Gonzalez -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Except as contained in this notice, the name(s) of the above copyright -# holders shall not be used in advertising or otherwise to promote the -# sale, use or other dealings in this Software without prior written -# authorization. -# -import getpass +# -*- coding: utf-8 -*- +# Author: Ricardo Garcia Gonzalez +# Author: Danny Colligan +# License: Public domain code +import htmlentitydefs import httplib +import locale import math import netrc -import optparse import os +import os.path import re import socket import string import sys import time +import urllib import urllib2 -# Global constants -const_1k = 1024 -const_initial_block_size = 10 * const_1k -const_epsilon = 0.0001 -const_timeout = 120 - -const_video_url_str = 'http://www.youtube.com/watch?v=%s' -const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') -const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' -const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' -const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' -const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' -const_url_t_param_re = re.compile(r', "t": "([^"]+)"') -const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s' -const_video_title_re = re.compile(r'YouTube - ([^<]*)', re.M | re.I) - -# Print error message, followed by standard advice information, and then exit -def error_advice_exit(error_text): - sys.stderr.write('Error: %s.\n' % error_text) - sys.stderr.write('Try again several times. It may be a temporary problem.\n') - sys.stderr.write('Other typical problems:\n\n') - sys.stderr.write('* Video no longer exists.\n') - sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n') - sys.stderr.write('* You provided the account data, but it is not valid.\n') - sys.stderr.write('* The connection was cut suddenly for some reason.\n') - sys.stderr.write('* YouTube changed their system, and the program no longer works.\n') - sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n') - sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n') - sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n') - sys.stderr.write('Try again several times and contact me if the problem persists.\n') - sys.exit('\n') - -# Wrapper to create custom requests with typical headers -def request_create(url, data=None): - retval = urllib2.Request(url) - if data is not None: - retval.add_data(data) - # Try to mimic Firefox, at least a little bit - retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') - retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') - retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') - retval.add_header('Accept-Language', 'en-us,en;q=0.5') - return retval - -# Perform a request, process headers and return response -def perform_request(url, data=None): - request = request_create(url, data) - response = urllib2.urlopen(request) - return response - -# Conditional print -def cond_print(str): - global cmdl_opts - if not (cmdl_opts.quiet or cmdl_opts.get_url): - sys.stdout.write(str) - sys.stdout.flush() - -# Title string normalization -def title_string_norm(title): - title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title) - title = '_'.join(title.split()) - title = title.lower() - return title - -# Generic download step -def download_step(return_data_flag, step_title, step_error, url, post_data=None): - try: - cond_print('%s... ' % step_title) - data = perform_request(url, post_data).read() - cond_print('done.\n') - if return_data_flag: - return data - return None +std_headers = { + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept-Language': 'en-us,en;q=0.5', +} - except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): - cond_print('failed.\n') - error_advice_exit(step_error) +simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') - except KeyboardInterrupt: - sys.exit('\n') +def preferredencoding(): + """Get preferred encoding. -# Generic extract step -def extract_step(step_title, step_error, regexp, data): + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ try: - cond_print('%s... ' % step_title) - match = regexp.search(data) - - if match is None: - cond_print('failed.\n') - error_advice_exit(step_error) - - extracted_data = match.group(1) - cond_print('done.\n') - return extracted_data + pref = locale.getpreferredencoding() + # Mac OSX systems have this problem sometimes + if pref == '': + return 'UTF-8' + return pref + except: + sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n') + return 'UTF-8' + +class DownloadError(Exception): + """Download Error exception. - except KeyboardInterrupt: - sys.exit('\n') - -# Calculate new block size based on previous block size -def new_block_size(before, after, bytes): - new_min = max(bytes / 2.0, 1.0) - new_max = max(bytes * 2.0, 1.0) - dif = after - before - if dif < const_epsilon: - return int(new_max) - rate = bytes / dif - if rate > new_max: - return int(new_max) - if rate < new_min: - return int(new_min) - return int(rate) - -# Get optimum 1k exponent to represent a number of bytes -def optimum_k_exp(num_bytes): - global const_1k - if num_bytes == 0: - return 0 - return long(math.log(num_bytes, const_1k)) - -# Get optimum representation of number of bytes -def format_bytes(num_bytes): - global const_1k - try: - exp = optimum_k_exp(num_bytes) - suffix = 'bkMGTPEZY'[exp] - if exp == 0: - return '%s%s' % (num_bytes, suffix) - converted = float(num_bytes) / float(const_1k**exp) + This exception may be thrown by FileDownloader objects if they are not + configured to continue on errors. They will contain the appropriate + error message. + """ + pass + +class SameFileError(Exception): + """Same File exception. + + This exception will be thrown by FileDownloader objects if they detect + multiple files would have to be downloaded to the same file on disk. + """ + pass + +class PostProcessingError(Exception): + """Post Processing exception. + + This exception may be raised by PostProcessor's .run() method to + indicate an error in the postprocessing task. + """ + pass + +class UnavailableFormatError(Exception): + """Unavailable Format exception. + + This exception will be thrown when a video is requested + in a format that is not available for that video. + """ + pass + +class ContentTooShortError(Exception): + """Content Too Short exception. + + This exception may be raised by FileDownloader objects when a file they + download is too small for what the server announced first, indicating + the connection was probably interrupted. + """ + # Both in bytes + downloaded = None + expected = None + + def __init__(self, downloaded, expected): + self.downloaded = downloaded + self.expected = expected + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk if the user has requested + it, among some other tasks. In most cases there should be one per + program. As, given a video URL, the downloader doesn't know how to + extract all the needed information, task that InfoExtractors do, it + has to pass the URL to one of them. + + For this, file downloader objects have a method that allows + InfoExtractors to be registered in a given order. When it is passed + a URL, the file downloader handles it to the first InfoExtractor it + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + asks the FileDownloader to process the video information, possibly + downloading the video. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. These options are available through the params + attribute for the InfoExtractors to use. The FileDownloader also + registers itself as the downloader in charge for the InfoExtractors + that are added to it, so this is a "mutual registration". + + Available options: + + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + simulate: Do not download the video files. + format: Video format code. + outtmpl: Template for output names. + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + continuedl: Try to continue downloads if possible. + """ + + params = None + _ies = [] + _pps = [] + _download_retcode = None + + def __init__(self, params): + """Create a FileDownloader object with the given options.""" + self._ies = [] + self._pps = [] + self._download_retcode = 0 + self.params = params + + @staticmethod + def pmkdir(filename): + """Create directory components in filename. Similar to Unix "mkdir -p".""" + components = filename.split(os.sep) + aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] + aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator + for dir in aggregate: + if not os.path.exists(dir): + os.mkdir(dir) + + @staticmethod + def format_bytes(bytes): + if bytes is None: + return 'N/A' + if type(bytes) is str: + bytes = float(bytes) + if bytes == 0.0: + exponent = 0 + else: + exponent = long(math.log(bytes, 1024.0)) + suffix = 'bkMGTPEZY'[exponent] + converted = float(bytes) / float(1024**exponent) return '%.2f%s' % (converted, suffix) - except IndexError: - sys.exit('Error: internal error formatting number of bytes.') - -# Calculate ETA and return it in string format as MM:SS -def calc_eta(start, now, total, current): - dif = now - start - if current == 0 or dif < const_epsilon: - return '--:--' - rate = float(current) / dif - eta = long((total - current) / rate) - (eta_mins, eta_secs) = divmod(eta, 60) - if eta_mins > 99: - return '--:--' - return '%02d:%02d' % (eta_mins, eta_secs) - -# Calculate speed and return it in string format -def calc_speed(start, now, bytes): - dif = now - start - if bytes == 0 or dif < const_epsilon: - return 'N/A b' - return format_bytes(float(bytes) / dif) - - -# Title string minimal transformation -def title_string_touch(title): - return title.replace(os.sep, '%') - -# Create the command line options parser and parse command line -cmdl_usage = 'usage: %prog [options] video_url' -cmdl_version = '2008.01.24' -cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve') -cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit') -cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit') -cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') -cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password') -cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name') -cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode') -cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video') -cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name') -cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name') -cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data') -cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only') -cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too') -(cmdl_opts, cmdl_args) = cmdl_parser.parse_args() - -# Set socket timeout -socket.setdefaulttimeout(const_timeout) - -# Get video URL -if len(cmdl_args) != 1: - cmdl_parser.print_help() - sys.exit('\n') -video_url_cmdl = cmdl_args[0] - -# Verify video URL format and convert to "standard" format -video_url_mo = const_video_url_re.match(video_url_cmdl) -if video_url_mo is None: - sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') -video_url_id = video_url_mo.group(2) -video_url = const_video_url_str % video_url_id - -# Check conflicting options -if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url): - sys.stderr.write('Warning: video file name given but will not be used.\n') - -if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal): - sys.exit('Error: using the video title conflicts with using a given file name.') - -if cmdl_opts.use_title and cmdl_opts.use_literal: - sys.exit('Error: cannot use title and literal title at the same time.') - -if cmdl_opts.quiet and cmdl_opts.get_url: - sys.exit('Error: cannot be quiet and print final URL at the same time.') - -# Incorrect option formatting -if cmdl_opts.username is None and cmdl_opts.password is not None: - sys.exit('Error: password give but username is missing.') - -if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None): - sys.exit('Error: cannot use netrc and username/password at the same time.') - -if cmdl_opts.get_url is None and cmdl_opts.get_title is not None: - sys.exit('Error: getting title requires getting URL.') - -# Get account information if any -account_username = None -account_password = None - -if cmdl_opts.use_netrc: - try: - info = netrc.netrc().authenticators('youtube') - if info is None: - sys.exit('Error: no authenticators for machine youtube.') - account_username = info[0] - account_password = info[2] - except IOError: - sys.exit('Error: unable to read .netrc file.') - except netrc.NetrcParseError: - sys.exit('Error: unable to parse .netrc file.') -else: - account_username = cmdl_opts.username - if account_username is not None: - if cmdl_opts.password is None: - account_password = getpass.getpass('Type YouTube password and press return: ') + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return '---.-%' + return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return '--:--' + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return '--:--' + rate = float(current) / dif + eta = long((float(total) - float(current)) / rate) + (eta_mins, eta_secs) = divmod(eta, 60) + if eta_mins > 99: + return '--:--' + return '%02d:%02d' % (eta_mins, eta_secs) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return '%10s' % '---b/s' + return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return long(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return long(new_max) + if rate < new_min: + return long(new_min) + return long(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into a long integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return long(round(number * multiplier)) + + @staticmethod + def verify_url(url): + """Verify a URL is valid and data could be downloaded. Return real data URL.""" + request = urllib2.Request(url, None, std_headers) + data = urllib2.urlopen(request) + data.read(1) + url = data.geturl() + data.close() + return url + + def add_info_extractor(self, ie): + """Add an InfoExtractor object to the end of the list.""" + self._ies.append(ie) + ie.set_downloader(self) + + def add_post_processor(self, pp): + """Add a PostProcessor object to the end of the chain.""" + self._pps.append(pp) + pp.set_downloader(self) + + def to_stdout(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode.""" + if not self.params.get('quiet', False): + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), + sys.stdout.flush() + + def to_stderr(self, message): + """Print message to stderr.""" + print >>sys.stderr, message.encode(preferredencoding()) + + def fixed_template(self): + """Checks if the output template is fixed.""" + return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) + + def trouble(self, message=None): + """Determine action to take when a download problem appears. + + Depending on if the downloader has been configured to ignore + download errors or not, this method may throw an exception or + not when errors are found, after printing the message. + """ + if message is not None: + self.to_stderr(message) + if not self.params.get('ignoreerrors', False): + raise DownloadError(message) + self._download_retcode = 1 + + def slow_down(self, start_time, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit', None) + if rate_limit is None or byte_counter == 0: + return + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + + def report_destination(self, filename): + """Report destination filename.""" + self.to_stdout(u'[download] Destination: %s' % filename) + + def report_progress(self, percent_str, data_len_str, speed_str, eta_str): + """Report download progress.""" + self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + + def report_resuming_byte(self, resume_len): + """Report attemtp to resume at given byte.""" + self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + self.to_stdout(u'[download] %s has already been downloaded' % file_name) + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_stdout(u'[download] Unable to resume') + + def report_finish(self): + """Report download finished.""" + self.to_stdout(u'') + + def process_info(self, info_dict): + """Process a single dictionary returned by an InfoExtractor.""" + # Do nothing else if in simulate mode + if self.params.get('simulate', False): + try: + info_dict['url'] = self.verify_url(info_dict['url']) + except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: + raise UnavailableFormatError + + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'].encode(preferredencoding()) + if self.params.get('forceurl', False): + print info_dict['url'].encode(preferredencoding()) + + return + + try: + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + filename = self.params['outtmpl'] % template_dict + except (ValueError, KeyError), err: + self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + if self.params['nooverwrites'] and os.path.exists(filename): + self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) + return + + try: + self.pmkdir(filename) + except (OSError, IOError), err: + self.trouble('ERROR: unable to create directories: %s' % str(err)) + return + + try: + success = self._do_download(filename, info_dict['url']) + except (OSError, IOError), err: + raise UnavailableFormatError + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.trouble('ERROR: unable to download video data: %s' % str(err)) + return + except (ContentTooShortError, ), err: + self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + + if success: + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + self.trouble('ERROR: postprocessing: %s' % str(err)) + return + + def download(self, url_list): + """Download a given list of URLs.""" + if len(url_list) > 1 and self.fixed_template(): + raise SameFileError(self.params['outtmpl']) + + for url in url_list: + suitable_found = False + for ie in self._ies: + # Go to next InfoExtractor if not suitable + if not ie.suitable(url): + continue + + # Suitable InfoExtractor found + suitable_found = True + + # Extract information from URL and process it + ie.extract(url) + + # Suitable InfoExtractor had been found; go to next URL + break + + if not suitable_found: + self.trouble('ERROR: no suitable InfoExtractor: %s' % url) + + return self._download_retcode + + def post_process(self, filename, ie_info): + """Run the postprocessing chain on the given file.""" + info = dict(ie_info) + info['filepath'] = filename + for pp in self._pps: + info = pp.run(info) + if info is None: + break + + def _do_download(self, filename, url): + stream = None + open_mode = 'ab' + + basic_request = urllib2.Request(url, None, std_headers) + request = urllib2.Request(url, None, std_headers) + + # Attempt to resume download with "continuedl" option + if os.path.isfile(filename): + resume_len = os.path.getsize(filename) else: - account_password = cmdl_opts.password - -# Get output file name -if cmdl_opts.outfile is None: - video_filename = '%s.flv' % video_url_id -else: - video_filename = cmdl_opts.outfile - -# Install cookie and proxy handlers -urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) -urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) - -# Log in and confirm age if needed -if account_username is not None: - url = const_login_url_str % video_url_id - post = const_login_post_str % (video_url_id, account_username, account_password) - download_step(False, 'Logging in', 'unable to log in', url, post) - - url = const_age_url_str % video_url_id - post = const_age_post_str % video_url_id - download_step(False, 'Confirming age', 'unable to confirm age', url, post) - -# Retrieve video webpage -video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url) - -# Extract video title if needed -if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title: - video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage) - -# Extract needed video URL parameters -video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage) -video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param) - -# Rebuild filename if needed -if cmdl_opts.use_title or cmdl_opts.use_literal: - if cmdl_opts.use_title: - prefix = title_string_norm(video_title) - else: - prefix = title_string_touch(video_title) - video_filename = '%s-%s.flv' % (prefix, video_url_id) - -# Check name -if not video_filename.lower().endswith('.flv'): - sys.stderr.write('Warning: video file name does not end in .flv\n') - -# Retrieve video data -try: - cond_print('Requesting video file... ') - video_data = perform_request(video_url_real) - cond_print('done.\n') - cond_print('Video data found at %s\n' % video_data.geturl()) - - if cmdl_opts.get_title: - print video_title - - if cmdl_opts.get_url: - print video_data.geturl() - - if cmdl_opts.simulate or cmdl_opts.get_url: - sys.exit() + resume_len = 0 + if self.params['continuedl'] and resume_len != 0: + self.report_resuming_byte(resume_len) + request.add_header('Range','bytes=%d-' % resume_len) - try: - video_file = open(video_filename, 'wb') - except (IOError, OSError): - sys.exit('Error: unable to open "%s" for writing.' % video_filename) - try: - video_len = long(video_data.info()['Content-length']) - video_len_str = format_bytes(video_len) - except KeyError: - video_len = None - video_len_str = 'N/A' - - byte_counter = 0 - block_size = const_initial_block_size - start_time = time.time() - while True: - if video_len is not None: - percent = float(byte_counter) / float(video_len) * 100.0 - percent_str = '%.1f' % percent - eta_str = calc_eta(start_time, time.time(), video_len, byte_counter) + # Establish connection + try: + data = urllib2.urlopen(request) + except (urllib2.HTTPError, ), err: + if err.code != 416: # 416 is 'Requested range not satisfiable' + raise + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + if content_length is not None and long(content_length) == resume_len: + self.report_file_already_downloaded(filename) + return True + else: + self.report_unable_to_resume() + open_mode = 'wb' + + data_len = data.info().get('Content-length', None) + data_len_str = self.format_bytes(data_len) + byte_counter = 0 + block_size = 1024 + start = time.time() + while True: + # Download and write + before = time.time() + data_block = data.read(block_size) + after = time.time() + data_block_len = len(data_block) + if data_block_len == 0: + break + byte_counter += data_block_len + + # Open file just in time + if stream is None: + try: + stream = open(filename, open_mode) + self.report_destination(filename) + except (OSError, IOError), err: + self.trouble('ERROR: unable to open for writing: %s' % str(err)) + return False + stream.write(data_block) + block_size = self.best_block_size(after - before, data_block_len) + + # Progress message + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) + speed_str = self.calc_speed(start, time.time(), byte_counter) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) + + # Apply rate limit + self.slow_down(start, byte_counter) + + self.report_finish() + if data_len is not None and str(byte_counter) != data_len: + raise ContentTooShortError(byte_counter, long(data_len)) + return True + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information from the video (or videos) the URL refers to. This + information includes the real video URL, the video title and simplified + title, author and others. The information is stored in a dictionary + which is then passed to the FileDownloader. The FileDownloader + processes this information possibly downloading the video to the file + system, among other possible outcomes. The dictionaries must include + the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader. + title: Literal title. + stitle: Simplified title. + ext: Video filename extension. + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods, as well as the suitable() static method. + Probably, they should also be instantiated and added to the main + downloader. + """ + + _ready = False + _downloader = None + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader.""" + self._ready = False + self.set_downloader(downloader) + + @staticmethod + def suitable(url): + """Receives a URL and returns True if suitable for this IE.""" + return False + + def initialize(self): + """Initializes an instance (authentication, etc).""" + if not self._ready: + self._real_initialize() + self._ready = True + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + self.initialize() + return self._real_extract(url) + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + +class YoutubeIE(InfoExtractor): + """Information extractor for youtube.com.""" + + _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' + _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' + _NETRC_MACHINE = 'youtube' + _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag + _video_extensions = { + '13': '3gp', + '17': 'mp4', + '18': 'mp4', + '22': 'mp4', + } + + @staticmethod + def suitable(url): + return (re.match(YoutubeIE._VALID_URL, url) is not None) + + @staticmethod + def htmlentity_transform(matchobj): + """Transforms an HTML entity to a Unicode character.""" + entity = matchobj.group(1) + + # Known non-numeric HTML entity + if entity in htmlentitydefs.name2codepoint: + return unichr(htmlentitydefs.name2codepoint[entity]) + + # Unicode character + mobj = re.match(ur'(?u)#(x?\d+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith(u'x'): + base = 16 + numstr = u'0%s' % numstr + else: + base = 10 + return unichr(long(numstr, base)) + + # Unknown entity in name, return its literal representation + return (u'&%s;' % entity) + + def report_lang(self): + """Report attempt to set language.""" + self._downloader.to_stdout(u'[youtube] Setting language') + + def report_login(self): + """Report attempt to log in.""" + self._downloader.to_stdout(u'[youtube] Logging in') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[youtube] Confirming age') + + def report_video_info_webpage_download(self, video_id): + """Report attempt to download video info webpage.""" + self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) + + def report_information_extraction(self, video_id): + """Report attempt to extract video information.""" + self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + + def report_unavailable_format(self, video_id, format): + """Report extracted video URL.""" + self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + + def _real_initialize(self): + if self._downloader is None: + return + + username = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + return + + # Set language + request = urllib2.Request(self._LANG_URL, None, std_headers) + try: + self.report_lang() + urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + return + + # No authentication to be performed + if username is None: + return + + # Log in + login_form = { + 'current_form': 'loginForm', + 'next': '/', + 'action_login': 'Log In', + 'username': username, + 'password': password, + } + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) + try: + self.report_login() + login_results = urllib2.urlopen(request).read() + if re.search(r'(?i)]* name="loginForm"', login_results) is not None: + self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + return + + # Confirm age + age_form = { + 'next_url': '/', + 'action_confirm': 'Confirm', + } + request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) + try: + self.report_age_confirmation() + age_results = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract video id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group(2) + + # Downloader parameters + best_quality = False + format_param = None + quality_index = 0 + if self._downloader is not None: + params = self._downloader.params + format_param = params.get('format', None) + if format_param == '0': + format_param = self._available_formats[quality_index] + best_quality = True + + while True: + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + + # Get video info + video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id + request = urllib2.Request(video_info_url, None, std_headers) + try: + self.report_video_info_webpage_download(video_id) + video_info_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + self.report_information_extraction(video_id) + + # "t" param + mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + # Attempt to see if YouTube has issued an error message + mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') + stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') + stream.write(video_info_webpage) + stream.close() + else: + reason = urllib.unquote_plus(mobj.group(1)) + self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + return + token = urllib.unquote(mobj.group(1)) + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) + if format_param is not None: + video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + + # uploader + mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote(mobj.group(1)) + + # title + mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote(mobj.group(1)) + video_title = video_title.decode('utf-8') + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) + video_title = video_title.replace(os.sep, u'%') + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + }) + + return + + except UnavailableFormatError, err: + if best_quality: + if quality_index == len(self._available_formats) - 1: + # I don't ever expect this to happen + self._downloader.trouble(u'ERROR: no known formats available for video') + return + else: + self.report_unavailable_format(video_id, format_param) + quality_index += 1 + format_param = self._available_formats[quality_index] + continue + else: + self._downloader.trouble('ERROR: format not available for video') + return + + +class MetacafeIE(InfoExtractor): + """Information Extractor for metacafe.com.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' + _DISCLAIMER = 'http://www.metacafe.com/family_filter/' + _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(MetacafeIE._VALID_URL, url) is not None) + + def report_disclaimer(self): + """Report disclaimer retrieval.""" + self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[metacafe] Confirming age') + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) + + def _real_initialize(self): + # Retrieve disclaimer + request = urllib2.Request(self._DISCLAIMER, None, std_headers) + try: + self.report_disclaimer() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + return + + # Confirm age + disclaimer_form = { + 'filters': '0', + 'submit': "Continue - I'm over 18", + } + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + try: + self.report_age_confirmation() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + # Check if video comes from YouTube + mobj2 = re.match(r'^yt-(.*)$', video_id) + if mobj2 is not None: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + return + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + #if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract gdaKey') + # return + #gdaKey = mobj.group(1) + # + #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) + + video_url = mediaURL + + mobj = re.search(r'(?im)(.*) - Video', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + + mobj = re.search(r'(?ms)
  • .*?Submitter:.*?(.*?)<', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class YoutubeSearchIE(InfoExtractor): + """Information Extractor for YouTube search queries.""" + _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' + _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' + _youtube_ie = None + _max_youtube_results = 1000 + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_youtube_results) + return else: - percent_str = '---.-' - eta_str = '--:--' - counter = format_bytes(byte_counter) - speed_str = calc_speed(start_time, time.time(), byte_counter) - cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str)) - - before = time.time() - video_block = video_data.read(block_size) - after = time.time() - dl_bytes = len(video_block) - if dl_bytes == 0: - break - byte_counter += dl_bytes - video_file.write(video_block) - block_size = new_block_size(before, after, dl_bytes) - - if video_len is not None and byte_counter != video_len: - error_advice_exit('server did not send the expected ammount of data') - - video_file.close() - cond_print('done.\n') - cond_print('Video data saved to %s\n' % video_filename) - -except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error): - cond_print('failed.\n') - error_advice_exit('unable to download video data') - -except KeyboardInterrupt: - sys.exit('\n') - -# Finish -sys.exit() + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_youtube_results: + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + n = self._max_youtube_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + + pagenum = pagenum + 1 + +class YoutubePlaylistIE(InfoExtractor): + """Information Extractor for YouTube playlists.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' + _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) + + def report_download_page(self, playlist_id, pagenum): + """Report attempt to download playlist page with given number.""" + self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract playlist id + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download playlist pages + playlist_id = mobj.group(1) + video_ids = [] + pagenum = 1 + + while True: + self.report_download_page(playlist_id, pagenum) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + break + pagenum = pagenum + 1 + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + +class PostProcessor(object): + """Post Processor class. + + PostProcessor objects can be added to downloaders with their + add_post_processor() method. When the downloader has finished a + successful download, it will take its internal chain of PostProcessors + and start calling the run() method on each one of them, first with + an initial argument and then with the returned value of the previous + PostProcessor. + + The chain will be stopped if one of them ever returns None or the end + of the chain is reached. + + PostProcessor objects follow a "mutual registration" process similar + to InfoExtractor objects. + """ + + _downloader = None + + def __init__(self, downloader=None): + self._downloader = downloader + + def set_downloader(self, downloader): + """Sets the downloader for this PP.""" + self._downloader = downloader + + def run(self, information): + """Run the PostProcessor. + + The "information" argument is a dictionary like the ones + composed by InfoExtractors. The only difference is that this + one has an extra field called "filepath" that points to the + downloaded file. + + When this method returns None, the postprocessing chain is + stopped. However, this method may return an information + dictionary that will be passed to the next postprocessing + object in the chain. It can be the one it received after + changing some fields. + + In addition, this method may raise a PostProcessingError + exception that will be taken into account by the downloader + it was called from. + """ + return information # by default, do nothing + +### MAIN PROGRAM ### +if __name__ == '__main__': + try: + # Modules needed only when running the main program + import getpass + import optparse + + # General configuration + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) + urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Parse command line + parser = optparse.OptionParser( + usage='Usage: %prog [options] url...', + version='2009.09.13', + conflict_handler='resolve', + ) + + parser.add_option('-h', '--help', + action='help', help='print this help text and exit') + parser.add_option('-v', '--version', + action='version', help='print program version and exit') + parser.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + parser.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + + authentication = optparse.OptionGroup(parser, 'Authentication Options') + authentication.add_option('-u', '--username', + dest='username', metavar='UN', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PW', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + parser.add_option_group(authentication) + + video_format = optparse.OptionGroup(parser, 'Video Format Options') + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FMT', help='video format code') + video_format.add_option('-b', '--best-quality', + action='store_const', dest='format', help='download the best quality video possible', const='0') + video_format.add_option('-m', '--mobile-version', + action='store_const', dest='format', help='alias for -f 17', const='17') + video_format.add_option('-d', '--high-def', + action='store_const', dest='format', help='alias for -f 22', const='22') + parser.add_option_group(video_format) + + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + parser.add_option_group(verbosity) + + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TPL', help='output filename template') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='F', help='file containing URLs to download') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + parser.add_option_group(filesystem) + + (opts, args) = parser.parse_args() + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + batchurls = open(opts.batchfile, 'r').readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # Conflicting, missing and erroneous options + if len(all_urls) < 1: + parser.error(u'you must provide at least one URL') + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): + parser.error(u'using output template conflicts with using title or literal title') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + + # Information extractors + youtube_ie = YoutubeIE() + metacafe_ie = MetacafeIE(youtube_ie) + youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_search_ie = YoutubeSearchIE(youtube_ie) + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle), + 'format': opts.format, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'continuedl': opts.continue_dl, + }) + fd.add_info_extractor(youtube_search_ie) + fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(youtube_ie) + retcode = fd.download(all_urls) + sys.exit(retcode) + + except DownloadError: + sys.exit(1) + except SameFileError: + sys.exit(u'ERROR: fixed output name but more than one file to download') + except KeyboardInterrupt: + sys.exit(u'\nERROR: Interrupted by user') diff --git a/youtube-dl.spec b/youtube-dl.spec index 6fda444..2dbfd7c 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,13 +1,13 @@ Name: youtube-dl -Version: 2008.01.24 -Release: 3%{?dist} +Version: 2009.09.13 +Release: 2%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia -License: MIT -URL: http://www.arrakis.es/~rggi3/youtube-dl/ -Source0: http://www.arrakis.es/~rggi3/youtube-dl/youtube-dl -Source1: http://www.arrakis.es/~rggi3/youtube-dl/index.html +License: Public Domain +URL: http://bitbucket.org/rg3/youtube-dl +Source0: http://bitbucket.org/rg3/youtube-dl/raw/%{version}/youtube-dl +Source1: http://bitbucket.org/rg3/youtube-dl/wiki/Home BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch Requires: python >= 2.4 @@ -20,7 +20,7 @@ youtube-dl to mały tekstowy program służący do pobierania filmów z youtube.com. %prep -cp %{SOURCE1} . +install -p -m0644 %{SOURCE1} index.html %build #nothing to build @@ -39,6 +39,13 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Oct 09 2009 Rafał Psota - 2009.09.13-2 +- Small fix in %%prep + +* Sun Sep 27 2009 Rafał Psota - 2009.09.13-1 +- Update to 2009.09.13 +- License change to Public Domain + * Mon Jul 27 2009 Fedora Release Engineering - 2008.01.24-3 - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild From baa79921375b42b55655480c88f0199ae7125e2a Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 25 Nov 2009 22:39:35 +0000 Subject: [PATCH 008/279] Fix typo that causes a failure to update the common directory. (releng #2781) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ac11e79..e163bdb 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ # Makefile for source rpm: youtube-dl -# $Id$ +# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ NAME := youtube-dl SPECFILE = $(firstword $(wildcard *.spec)) define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done +for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done endef MAKEFILE_COMMON := $(shell $(find-makefile-common)) From 092df3c478a5beaa9ac0c525cdb5fa4b61d27ecf Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Thu, 26 Nov 2009 01:12:28 +0000 Subject: [PATCH 009/279] Fix typo that causes a failure to update the common directory. (releng #2781) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ac11e79..e163bdb 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ # Makefile for source rpm: youtube-dl -# $Id$ +# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ NAME := youtube-dl SPECFILE = $(firstword $(wildcard *.spec)) define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done +for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done endef MAKEFILE_COMMON := $(shell $(find-makefile-common)) From 65b5d97a7de5ff869c63caaa901bf458af8d2fb1 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Wed, 17 Feb 2010 03:34:46 +0000 Subject: [PATCH 010/279] Initialize branch F-13 for youtube-dl --- branch | 1 + 1 file changed, 1 insertion(+) create mode 100644 branch diff --git a/branch b/branch new file mode 100644 index 0000000..baa94ef --- /dev/null +++ b/branch @@ -0,0 +1 @@ +F-13 From 6f9ab6ce2d00988571b27175a1990397a0b4755b Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:27:05 +0000 Subject: [PATCH 011/279] update to new release to fix download issues for some videos --- Home | 208 ++++++++------ youtube-dl | 791 +++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 823 insertions(+), 176 deletions(-) diff --git a/Home b/Home index 25f453b..1be19d9 100644 --- a/Home +++ b/Home @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
  • .*?Submitter:.*?(.*?)<', webpage) + mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -882,6 +1008,396 @@ class MetacafeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class GoogleIE(InfoExtractor): + """Information extractor for video.google.com.""" + + _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(GoogleIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'mp4' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r"download_url:'([^']+)'", webpage) + if mobj is None: + video_extension = 'flv' + mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = mediaURL.replace('\\x3d', '\x3d') + mediaURL = mediaURL.replace('\\x26', '\x26') + + video_url = mediaURL + + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class PhotobucketIE(InfoExtractor): + """Information extractor for photobucket.com.""" + + _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(PhotobucketIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + video_url = mediaURL + + mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + video_uploader = mobj.group(2).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class YahooIE(InfoExtractor): + """Information extractor for video.yahoo.com.""" + + # _VALID_URL matches all Yahoo! Video URLs + # _VPAGE_URL matches only the extractable '/watch/' URLs + _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' + _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(YahooIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(2) + video_extension = 'flv' + + # Rewrite valid but non-extractable URLs as + # extractable English language /watch/ URLs + if re.match(self._VPAGE_URL, url) is None: + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract id field') + return + yahoo_id = mobj.group(1) + + mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract vid field') + return + yahoo_vid = mobj.group(1) + + url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) + return self._real_extract(url) + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'

    (.*)

    ', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video height and width + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video height') + return + yv_video_height = mobj.group(1) + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video width') + return + yv_video_width = mobj.group(1) + + # Retrieve video playlist to extract media URL + # I'm not completely sure what all these options are, but we + # seem to need most of them, otherwise the server sends a 401. + yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents + yv_bitrate = '700' # according to Wikipedia this is hard-coded + request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract media URL from playlist XML + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + # video uploader is domain name + mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_uploader = mobj.group(1).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -906,6 +1422,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): @@ -919,6 +1436,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -979,10 +1497,10 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1028,7 +1546,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: break pagenum = pagenum + 1 @@ -1036,6 +1554,61 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class YoutubeUserIE(InfoExtractor): + """Information Extractor for YouTube users.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' + _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeUserIE._VALID_URL, url) is not None) + + def report_download_page(self, username): + """Report attempt to download user page.""" + self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract username + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download user page + username = mobj.group(1) + video_ids = [] + pagenum = 1 + + self.report_download_page(username) + request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + class PostProcessor(object): """Post Processor class. @@ -1089,6 +1662,22 @@ if __name__ == '__main__': import getpass import optparse + # Function to update the program file with the latest version from bitbucket.org + def update_self(downloader, filename): + # Note: downloader only used for options + if not os.access (filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_stdout('Updating to latest stable version...') + latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + downloader.to_stdout('Updated to version %s' % latest_version) + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) @@ -1097,7 +1686,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.09.13', + version='2010.04.04', conflict_handler='resolve', ) @@ -1105,6 +1694,8 @@ if __name__ == '__main__': action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') + parser.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', @@ -1128,6 +1719,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', action='store_const', dest='format', help='alias for -f 22', const='22') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -1139,6 +1732,8 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -1157,7 +1752,7 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -1170,8 +1765,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if len(all_urls) < 1: - parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -1192,7 +1785,12 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + photobucket_ie = PhotobucketIE() + yahoo_ie = YahooIE() + generic_ie = GenericIE() # File downloader fd = FileDownloader({ @@ -1205,6 +1803,9 @@ if __name__ == '__main__': 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), @@ -1212,11 +1813,31 @@ if __name__ == '__main__': 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(photobucket_ie) + fd.add_info_extractor(yahoo_ie) + + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + + # Update version + if opts.update_self: + update_self(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() retcode = fd.download(all_urls) sys.exit(retcode) From f8b5c2052c46279f02e940ea1d1a0a2948c19e84 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:27:05 +0000 Subject: [PATCH 012/279] update to new release to fix download issues for some videos --- Home | 208 ++++++++------ index.html | 656 +++++++++++++++++++++++++++++++++----------- youtube-dl | 791 +++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 1319 insertions(+), 336 deletions(-) diff --git a/Home b/Home index 25f453b..1be19d9 100644 --- a/Home +++ b/Home @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
  • .*?Submitter:.*?(.*?)<', webpage) + mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -882,6 +1008,396 @@ class MetacafeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class GoogleIE(InfoExtractor): + """Information extractor for video.google.com.""" + + _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(GoogleIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'mp4' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r"download_url:'([^']+)'", webpage) + if mobj is None: + video_extension = 'flv' + mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = mediaURL.replace('\\x3d', '\x3d') + mediaURL = mediaURL.replace('\\x26', '\x26') + + video_url = mediaURL + + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class PhotobucketIE(InfoExtractor): + """Information extractor for photobucket.com.""" + + _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(PhotobucketIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + video_url = mediaURL + + mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + video_uploader = mobj.group(2).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class YahooIE(InfoExtractor): + """Information extractor for video.yahoo.com.""" + + # _VALID_URL matches all Yahoo! Video URLs + # _VPAGE_URL matches only the extractable '/watch/' URLs + _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' + _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(YahooIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(2) + video_extension = 'flv' + + # Rewrite valid but non-extractable URLs as + # extractable English language /watch/ URLs + if re.match(self._VPAGE_URL, url) is None: + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract id field') + return + yahoo_id = mobj.group(1) + + mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract vid field') + return + yahoo_vid = mobj.group(1) + + url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) + return self._real_extract(url) + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'

    (.*)

    ', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video height and width + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video height') + return + yv_video_height = mobj.group(1) + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video width') + return + yv_video_width = mobj.group(1) + + # Retrieve video playlist to extract media URL + # I'm not completely sure what all these options are, but we + # seem to need most of them, otherwise the server sends a 401. + yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents + yv_bitrate = '700' # according to Wikipedia this is hard-coded + request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract media URL from playlist XML + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + # video uploader is domain name + mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_uploader = mobj.group(1).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -906,6 +1422,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): @@ -919,6 +1436,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -979,10 +1497,10 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1028,7 +1546,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: break pagenum = pagenum + 1 @@ -1036,6 +1554,61 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class YoutubeUserIE(InfoExtractor): + """Information Extractor for YouTube users.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' + _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeUserIE._VALID_URL, url) is not None) + + def report_download_page(self, username): + """Report attempt to download user page.""" + self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract username + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download user page + username = mobj.group(1) + video_ids = [] + pagenum = 1 + + self.report_download_page(username) + request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + class PostProcessor(object): """Post Processor class. @@ -1089,6 +1662,22 @@ if __name__ == '__main__': import getpass import optparse + # Function to update the program file with the latest version from bitbucket.org + def update_self(downloader, filename): + # Note: downloader only used for options + if not os.access (filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_stdout('Updating to latest stable version...') + latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + downloader.to_stdout('Updated to version %s' % latest_version) + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) @@ -1097,7 +1686,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.09.13', + version='2010.04.04', conflict_handler='resolve', ) @@ -1105,6 +1694,8 @@ if __name__ == '__main__': action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') + parser.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', @@ -1128,6 +1719,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', action='store_const', dest='format', help='alias for -f 22', const='22') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -1139,6 +1732,8 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -1157,7 +1752,7 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -1170,8 +1765,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if len(all_urls) < 1: - parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -1192,7 +1785,12 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + photobucket_ie = PhotobucketIE() + yahoo_ie = YahooIE() + generic_ie = GenericIE() # File downloader fd = FileDownloader({ @@ -1205,6 +1803,9 @@ if __name__ == '__main__': 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), @@ -1212,11 +1813,31 @@ if __name__ == '__main__': 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(photobucket_ie) + fd.add_info_extractor(yahoo_ie) + + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + + # Update version + if opts.update_self: + update_self(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() retcode = fd.download(all_urls) sys.exit(retcode) From 0f8a23538e9402a2b4fd182b1c836e70380bd536 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:27:06 +0000 Subject: [PATCH 013/279] update to new release to fix download issues for some videos --- Home | 208 +++++++------ index.html | 208 +++++++------ youtube-dl | 791 ++++++++++++++++++++++++++++++++++++++++++------ youtube-dl.spec | 7 +- 4 files changed, 945 insertions(+), 269 deletions(-) diff --git a/Home b/Home index 25f453b..1be19d9 100644 --- a/Home +++ b/Home @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
  • .*?Submitter:.*?(.*?)<', webpage) + mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -882,6 +1008,396 @@ class MetacafeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class GoogleIE(InfoExtractor): + """Information extractor for video.google.com.""" + + _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(GoogleIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'mp4' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r"download_url:'([^']+)'", webpage) + if mobj is None: + video_extension = 'flv' + mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = mediaURL.replace('\\x3d', '\x3d') + mediaURL = mediaURL.replace('\\x26', '\x26') + + video_url = mediaURL + + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class PhotobucketIE(InfoExtractor): + """Information extractor for photobucket.com.""" + + _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(PhotobucketIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + video_url = mediaURL + + mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + video_uploader = mobj.group(2).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class YahooIE(InfoExtractor): + """Information extractor for video.yahoo.com.""" + + # _VALID_URL matches all Yahoo! Video URLs + # _VPAGE_URL matches only the extractable '/watch/' URLs + _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' + _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(YahooIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(2) + video_extension = 'flv' + + # Rewrite valid but non-extractable URLs as + # extractable English language /watch/ URLs + if re.match(self._VPAGE_URL, url) is None: + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract id field') + return + yahoo_id = mobj.group(1) + + mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract vid field') + return + yahoo_vid = mobj.group(1) + + url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) + return self._real_extract(url) + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'

    (.*)

    ', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video height and width + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video height') + return + yv_video_height = mobj.group(1) + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video width') + return + yv_video_width = mobj.group(1) + + # Retrieve video playlist to extract media URL + # I'm not completely sure what all these options are, but we + # seem to need most of them, otherwise the server sends a 401. + yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents + yv_bitrate = '700' # according to Wikipedia this is hard-coded + request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract media URL from playlist XML + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + # video uploader is domain name + mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_uploader = mobj.group(1).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -906,6 +1422,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): @@ -919,6 +1436,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -979,10 +1497,10 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1028,7 +1546,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: break pagenum = pagenum + 1 @@ -1036,6 +1554,61 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class YoutubeUserIE(InfoExtractor): + """Information Extractor for YouTube users.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' + _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeUserIE._VALID_URL, url) is not None) + + def report_download_page(self, username): + """Report attempt to download user page.""" + self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract username + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download user page + username = mobj.group(1) + video_ids = [] + pagenum = 1 + + self.report_download_page(username) + request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + class PostProcessor(object): """Post Processor class. @@ -1089,6 +1662,22 @@ if __name__ == '__main__': import getpass import optparse + # Function to update the program file with the latest version from bitbucket.org + def update_self(downloader, filename): + # Note: downloader only used for options + if not os.access (filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_stdout('Updating to latest stable version...') + latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + downloader.to_stdout('Updated to version %s' % latest_version) + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) @@ -1097,7 +1686,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.09.13', + version='2010.04.04', conflict_handler='resolve', ) @@ -1105,6 +1694,8 @@ if __name__ == '__main__': action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') + parser.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', @@ -1128,6 +1719,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', action='store_const', dest='format', help='alias for -f 22', const='22') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -1139,6 +1732,8 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -1157,7 +1752,7 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -1170,8 +1765,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if len(all_urls) < 1: - parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -1192,7 +1785,12 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + photobucket_ie = PhotobucketIE() + yahoo_ie = YahooIE() + generic_ie = GenericIE() # File downloader fd = FileDownloader({ @@ -1205,6 +1803,9 @@ if __name__ == '__main__': 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), @@ -1212,11 +1813,31 @@ if __name__ == '__main__': 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(photobucket_ie) + fd.add_info_extractor(yahoo_ie) + + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + + # Update version + if opts.update_self: + update_self(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() retcode = fd.download(all_urls) sys.exit(retcode) diff --git a/youtube-dl.spec b/youtube-dl.spec index 2dbfd7c..d12bd97 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl -Version: 2009.09.13 -Release: 2%{?dist} +Version: 2010.04.04 +Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Apr 29 2010 Till Maas - 2010.04.04-1 +- Update to latest release to fix some download issues RH #582372 + * Fri Oct 09 2009 Rafał Psota - 2009.09.13-2 - Small fix in %%prep From 53c56dbf10245797de8eb2e22b5b8e85640c0139 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:34:28 +0000 Subject: [PATCH 014/279] update spec like it was intended --- youtube-dl.spec | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 2dbfd7c..d12bd97 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl -Version: 2009.09.13 -Release: 2%{?dist} +Version: 2010.04.04 +Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Apr 29 2010 Till Maas - 2010.04.04-1 +- Update to latest release to fix some download issues RH #582372 + * Fri Oct 09 2009 Rafał Psota - 2009.09.13-2 - Small fix in %%prep From 10b697701b472ddd961b6519dd627e03c6283729 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:34:28 +0000 Subject: [PATCH 015/279] update spec like it was intended --- youtube-dl.spec | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 2dbfd7c..d12bd97 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl -Version: 2009.09.13 -Release: 2%{?dist} +Version: 2010.04.04 +Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Apr 29 2010 Till Maas - 2010.04.04-1 +- Update to latest release to fix some download issues RH #582372 + * Fri Oct 09 2009 Rafał Psota - 2009.09.13-2 - Small fix in %%prep From 1c46e3df5ef5e9ddf02a38659004106e55c596da Mon Sep 17 00:00:00 2001 From: Till Maas Date: Mon, 7 Jun 2010 11:52:01 +0000 Subject: [PATCH 016/279] - Update to latest release --- Home | 46 +++--- youtube-dl | 410 +++++++++++++++++++++++++++++++++++++++++++----- youtube-dl.spec | 5 +- 3 files changed, 397 insertions(+), 64 deletions(-) diff --git a/Home b/Home index 1be19d9..24651a9 100644 --- a/Home +++ b/Home @@ -207,7 +207,7 @@
  • - Issues (11) » + Issues (10) » @@ -265,9 +265,9 @@ @@ -283,7 +283,7 @@ @@ -293,7 +293,9 @@
      -
    • tip
    • +
    • tip
    • + +
    • 2010.06.06
    • 2010.04.04
    • @@ -335,8 +337,6 @@
    • 2009.01.31
    • -
    • 2008.11.01
    • -
    • 2008.10.16
    • 2008.09.20
    • @@ -378,9 +378,9 @@ @@ -401,7 +401,7 @@

      youtube-dl is a small command-line program for downloading videos from YouTube.com.

      -
      Clone this repository (size: 255.5 KB): HTTPS / SSH
      +
      Clone this repository (size: 269.0 KB): HTTPS / SSH
      $ hg clone http://bitbucket.org/rg3/youtube-dl
      @@ -443,7 +443,7 @@

      youtube-dl: Download videos from YouTube.com

      (and more...)

      What is it?

      -

      youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

      +

      youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

      I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

      Thanks for all the feedback received so far. I'm glad people find my program useful.

      Usage instructions

      @@ -477,10 +477,10 @@

    Download it

    Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

    -

    2010.04.04

    -
    • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
    • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
    • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 +

      2010.06.06

      +
      • MD5: a995ba360c5b0fbd2a22c48306367cc5 +
      • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 +
      • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096

      Output template

      The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

      @@ -504,6 +504,12 @@ +
      + + This revision is from 2010-06-06 18:15 + +
      + @@ -519,7 +525,7 @@ Piston 0.2.3rc1 / Hg 1.3.1 / Python 2.5.2 / - r2890| fe01 + r2988| fe01 diff --git a/youtube-dl b/youtube-dl index 43566b6..01a61ba 100644 --- a/youtube-dl +++ b/youtube-dl @@ -193,6 +193,7 @@ class FileDownloader(object): ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 503 continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. """ @@ -364,6 +365,10 @@ class FileDownloader(object): """Report attemtp to resume at given byte.""" self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + def report_retry(self, count, retries): + """Report retry in case of HTTP error 503""" + self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries)) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -398,6 +403,10 @@ class FileDownloader(object): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forceurl', False): print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcedescription', False) and 'description' in info_dict: + print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') return @@ -419,7 +428,7 @@ class FileDownloader(object): return try: - success = self._do_download(filename, info_dict['url'].encode('utf-8')) + success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -471,7 +480,7 @@ class FileDownloader(object): if info is None: break - def _download_with_rtmpdump(self, filename, url): + def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) # Check for rtmpdump first @@ -484,12 +493,16 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename] + basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: - self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True) - time.sleep(2.0) # This seems to be needed + prevsize = os.path.getsize(filename) + self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) + time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) + cursize = os.path.getsize(filename) + if prevsize == cursize and retval == 1: + break if retval == 0: self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True @@ -497,10 +510,10 @@ class FileDownloader(object): self.trouble('\nERROR: rtmpdump exited with code %d' % retval) return False - def _do_download(self, filename, url): + def _do_download(self, filename, url, player_url): # Attempt to download using rtmpdump if url.startswith('rtmp'): - return self._download_with_rtmpdump(filename, url) + return self._download_with_rtmpdump(filename, url, player_url) stream = None open_mode = 'wb' @@ -519,24 +532,35 @@ class FileDownloader(object): request.add_header('Range','bytes=%d-' % resume_len) open_mode = 'ab' - # Establish connection - try: - data = urllib2.urlopen(request) - except (urllib2.HTTPError, ), err: - if err.code != 416: # 416 is 'Requested range not satisfiable' - raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + count = 0 + retries = self.params.get('retries', 0) + while True: + # Establish connection + try: + data = urllib2.urlopen(request) + break + except (urllib2.HTTPError, ), err: + if err.code == 503: + # Retry in case of HTTP error 503 + count += 1 + if count <= retries: + self.report_retry(count, retries) + continue + if err.code != 416: # 416 is 'Requested range not satisfiable' + raise + # Unable to resume + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + + if content_length is not None and long(content_length) == resume_len: + # Because the file had already been fully downloaded + self.report_file_already_downloaded(filename) + self._num_downloads += 1 + return True + else: + # Because the server didn't let us + self.report_unable_to_resume() + open_mode = 'wb' data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -562,7 +586,10 @@ class FileDownloader(object): except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False - stream.write(data_block) + try: + stream.write(data_block) + except (IOError, OSError), err: + self.trouble('\nERROR: unable to write data: %s' % str(err)) block_size = self.best_block_size(after - before, data_block_len) # Progress message @@ -598,6 +625,15 @@ class InfoExtractor(object): stitle: Simplified title. ext: Video filename extension. format: Video format. + player_url: SWF Player URL (may be None). + + The following fields are optional. Their primary purpose is to allow + youtube-dl to serve as the backend for a video search function, such + as the one in youtube2mp3. They are only used when their respective + forced printing functions are called: + + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. Subclasses of this one should re-define the _real_initialize() and _real_extract() methods, as well as the suitable() static method. @@ -656,6 +692,8 @@ class YoutubeIE(InfoExtractor): '18': 'mp4', '22': 'mp4', '37': 'mp4', + '43': 'webm', + '45': 'webm', } @staticmethod @@ -674,6 +712,10 @@ class YoutubeIE(InfoExtractor): """Report attempt to confirm age.""" self._downloader.to_stdout(u'[youtube] Confirming age') + def report_video_webpage_download(self, video_id): + """Report attempt to download video webpage.""" + self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) @@ -786,10 +828,26 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + # Get video info self.report_video_info_webpage_download(video_id) - for el_type in ['embedded', 'detailpage', 'vevo']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en' + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) request = urllib2.Request(video_info_url, None, std_headers) try: @@ -842,6 +900,20 @@ class YoutubeIE(InfoExtractor): simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) simple_title = simple_title.strip(ur'_') + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1) + try: # Process video information self._downloader.process_info({ @@ -852,34 +924,32 @@ class YoutubeIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description.decode('utf-8'), + 'player_url': player_url, }) if all_formats: - if quality_index == len(self._available_formats) - 1: + quality_index += 1 + if quality_index == len(self._available_formats): # None left to get return else: - quality_index += 1 format_param = self._available_formats[quality_index] - if format_param == None: - return continue - return except UnavailableFormatError, err: if best_quality or all_formats: - if quality_index == len(self._available_formats) - 1: + quality_index += 1 + if quality_index == len(self._available_formats): # I don't ever expect this to happen if not all_formats: self._downloader.trouble(u'ERROR: no known formats available for video') return else: self.report_unavailable_format(video_id, format_param) - quality_index += 1 format_param = self._available_formats[quality_index] - if format_param == None: - return continue else: self._downloader.trouble('ERROR: format not available for video') @@ -1009,6 +1079,7 @@ class MetacafeIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1080,6 +1151,32 @@ class GoogleIE(InfoExtractor): video_title = sanitize_title(video_title) simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + # Extract video description + mobj = re.search(r'([^<]*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video description') + return + video_description = mobj.group(1).decode('utf-8') + if not video_description: + video_description = 'No description available.' + + # Extract video thumbnail + if self._downloader.params.get('forcethumbnail', False): + request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1) + else: # we need something to pass to process_info + video_thumbnail = '' + + try: # Process video information self._downloader.process_info({ @@ -1090,6 +1187,7 @@ class GoogleIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1168,6 +1266,7 @@ class PhotobucketIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1258,6 +1357,21 @@ class YahooIE(InfoExtractor): return video_uploader = mobj.group(1).decode('utf-8') + # Extract video thumbnail + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # Extract video description + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video description') + return + video_description = mobj.group(1).decode('utf-8') + if not video_description: video_description = 'No description available.' + # Extract video height and width mobj = re.search(r'', webpage) if mobj is None: @@ -1303,6 +1417,11 @@ class YahooIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1398,6 +1517,7 @@ class GenericIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1494,6 +1614,188 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 +class GoogleSearchIE(InfoExtractor): + """Information Extractor for Google Video search queries.""" + _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' + _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' + _MORE_PAGES_INDICATOR = r'Next' + _google_ie = None + _max_google_results = 1000 + + def __init__(self, google_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._google_ie = google_ie + + @staticmethod + def suitable(url): + return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) + self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._google_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + query = query.encode('utf-8') + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_google_results) + return + else: + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_google_results: + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + n = self._max_google_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = mobj.group(1) + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) + return + + pagenum = pagenum + 1 + +class YahooSearchIE(InfoExtractor): + """Information Extractor for Yahoo! Video search queries.""" + _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' + _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"' + _MORE_PAGES_INDICATOR = r'\s*Next' + _yahoo_ie = None + _max_yahoo_results = 1000 + + def __init__(self, yahoo_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._yahoo_ie = yahoo_ie + + @staticmethod + def suitable(url): + return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) + self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._yahoo_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + query = query.encode('utf-8') + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_yahoo_results) + return + else: + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_yahoo_results: + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + n = self._max_yahoo_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = mobj.group(1) + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) + return + + pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" @@ -1686,7 +1988,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.04.04', + version='2010.06.06', conflict_handler='resolve', ) @@ -1700,6 +2002,8 @@ if __name__ == '__main__': action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + parser.add_option('-R', '--retries', + dest='retries', metavar='T', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -1732,6 +2036,10 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) @@ -1744,7 +2052,7 @@ if __name__ == '__main__': filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TPL', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download') + dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -1752,12 +2060,16 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: try: - batchurls = open(opts.batchfile, 'r').readlines() + if opts.batchfile == '-': + batchfd = sys.stdin + else: + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() batchurls = [x.strip() for x in batchurls] batchurls = [x for x in batchurls if len(x) > 0] except IOError: @@ -1780,6 +2092,11 @@ if __name__ == '__main__': if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') # Information extractors youtube_ie = YoutubeIE() @@ -1788,8 +2105,10 @@ if __name__ == '__main__': youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) google_ie = GoogleIE() + google_search_ie = GoogleSearchIE(google_ie) photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() + yahoo_search_ie = YahooSearchIE(yahoo_ie) generic_ie = GenericIE() # File downloader @@ -1797,10 +2116,12 @@ if __name__ == '__main__': 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle), + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle), + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') @@ -1812,6 +2133,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, }) @@ -1821,8 +2143,10 @@ if __name__ == '__main__': fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) + fd.add_info_extractor(google_search_ie) fd.add_info_extractor(photobucket_ie) fd.add_info_extractor(yahoo_ie) + fd.add_info_extractor(yahoo_search_ie) # This must come last since it's the # fallback if none of the others work diff --git a/youtube-dl.spec b/youtube-dl.spec index d12bd97..53607b5 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.04.04 +Version: 2010.06.06 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Mon Jun 07 2010 Till Maas - 2010.06.06-1 +- Update to latest release + * Thu Apr 29 2010 Till Maas - 2010.04.04-1 - Update to latest release to fix some download issues RH #582372 From f84a8cf37ea39292ece3f99ca8c6e0a962117407 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Mon, 7 Jun 2010 12:48:17 +0000 Subject: [PATCH 017/279] - Update to latest release --- Home | 46 +++--- index.html | 208 +++++++++++++----------- youtube-dl | 410 +++++++++++++++++++++++++++++++++++++++++++----- youtube-dl.spec | 5 +- 4 files changed, 514 insertions(+), 155 deletions(-) diff --git a/Home b/Home index 1be19d9..24651a9 100644 --- a/Home +++ b/Home @@ -207,7 +207,7 @@
    • - Issues (11) » + Issues (10) » @@ -265,9 +265,9 @@ @@ -283,7 +283,7 @@ @@ -293,7 +293,9 @@
        -
      • tip
      • +
      • tip
      • + +
      • 2010.06.06
      • 2010.04.04
      • @@ -335,8 +337,6 @@
      • 2009.01.31
      • -
      • 2008.11.01
      • -
      • 2008.10.16
      • 2008.09.20
      • @@ -378,9 +378,9 @@ @@ -401,7 +401,7 @@

        youtube-dl is a small command-line program for downloading videos from YouTube.com.

        -
        Clone this repository (size: 255.5 KB): HTTPS / SSH
        +
        Clone this repository (size: 269.0 KB): HTTPS / SSH
        $ hg clone http://bitbucket.org/rg3/youtube-dl
        @@ -443,7 +443,7 @@

        youtube-dl: Download videos from YouTube.com

        (and more...)

        What is it?

        -

        youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

        +

        youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

        I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

        Thanks for all the feedback received so far. I'm glad people find my program useful.

        Usage instructions

        @@ -477,10 +477,10 @@

      Download it

      Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

      -

      2010.04.04

      -
      • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
      • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
      • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 +

        2010.06.06

        +
        • MD5: a995ba360c5b0fbd2a22c48306367cc5 +
        • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 +
        • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096

        Output template

        The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

        @@ -504,6 +504,12 @@ +
        + + This revision is from 2010-06-06 18:15 + +
        + @@ -519,7 +525,7 @@ Piston 0.2.3rc1 / Hg 1.3.1 / Python 2.5.2 / - r2890| fe01 + r2988| fe01 diff --git a/index.html b/index.html index 25f453b..1be19d9 100644 --- a/index.html +++ b/index.html @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
      • - Issues (11) » + Issues (10) » @@ -265,9 +265,9 @@ @@ -283,7 +283,7 @@ @@ -293,7 +293,9 @@
          -
        • tip
        • +
        • tip
        • + +
        • 2010.06.06
        • 2010.04.04
        • @@ -335,8 +337,6 @@
        • 2009.01.31
        • -
        • 2008.11.01
        • -
        • 2008.10.16
        • 2008.09.20
        • @@ -378,9 +378,9 @@ @@ -401,7 +401,7 @@

          youtube-dl is a small command-line program for downloading videos from YouTube.com.

          -
          Clone this repository (size: 255.5 KB): HTTPS / SSH
          +
          Clone this repository (size: 269.0 KB): HTTPS / SSH
          $ hg clone http://bitbucket.org/rg3/youtube-dl
          @@ -443,7 +443,7 @@

          youtube-dl: Download videos from YouTube.com

          (and more...)

          What is it?

          -

          youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          +

          youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

          Thanks for all the feedback received so far. I'm glad people find my program useful.

          Usage instructions

          @@ -477,10 +477,10 @@

        Download it

        Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

        -

        2010.04.04

        -
        • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
        • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
        • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 +

          2010.06.06

          +
          • MD5: a995ba360c5b0fbd2a22c48306367cc5 +
          • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 +
          • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096

          Output template

          The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

          @@ -504,6 +504,12 @@ +
          + + This revision is from 2010-06-06 18:15 + +
          + @@ -519,7 +525,7 @@ Piston 0.2.3rc1 / Hg 1.3.1 / Python 2.5.2 / - r2890| fe01 + r2988| fe01 diff --git a/index.html b/index.html index 25f453b..1be19d9 100644 --- a/index.html +++ b/index.html @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
        • - Issues (10) » + Issues (12) » @@ -265,9 +235,9 @@ @@ -283,7 +253,7 @@ @@ -293,7 +263,9 @@ @@ -378,9 +348,9 @@ @@ -401,7 +371,7 @@

          youtube-dl is a small command-line program for downloading videos from YouTube.com.

          -
          Clone this repository (size: 269.0 KB): HTTPS / SSH
          +
          Clone this repository (size: 287.0 KB): HTTPS / SSH
          $ hg clone http://bitbucket.org/rg3/youtube-dl
          @@ -443,46 +413,41 @@

          youtube-dl: Download videos from YouTube.com

          (and more...)

          What is it?

          -

          youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          +

          youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

          Thanks for all the feedback received so far. I'm glad people find my program useful.

          Usage instructions

          In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

          In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

          -

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          +

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are usually in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

          More usage tips

          -
          • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
          • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
          • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
          • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
          • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
          • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
          • In a similar line, the -e or --get-title option tells the program to print the video title. -
          • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
          • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
          • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
          • The -m or --mobile-version option is an alias for -f 17. -
          • The -d or --high-def option is an alias for -f 22. -
          • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
          • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
          • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
          • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
          • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
          • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
          • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
          • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
          • For usage instructions, use youtube-dl -h or youtube-dl --help. -
          • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +

            The program is usually invoked as youtube-dl followed by options and the video URLs. Listing all the options here would make this text too long, so you can run youtube-dl --help and get a summary of them. From that point on you can start experimenting with the different options yourself. The most common ones are -t (or -l) to include the video title in the file name, and either -f or -b to download a high quality version of the video. Also, the -o option can specify the output file name and path. It allows special character sequences that can be used as templates to be replaced. See the "Output template" section for more details.

            +

            Supported sites

            +
            • YouTube.com. +
            • YouTube.com playlists (playlist URLs in "view_play_list" form). +
            • YouTube.com searches, using the special keyword "ytsearch" as a form of URL, as in "ytsearch:cute kittens". Do not forget the quotes if you want to include spaces in your search. Other variants are "ytsearchN" to download more than the first result, with N being a number, and "ytsearchall". +
            • metacafe.com. +
            • Google Video. +
            • Google Video searches ("gvsearch" keyword). +
            • Photobucket videos. +
            • Yahoo! video. +
            • Yahoo! video searches ("ybsearch" keyword). +
            • Dailymotion. +
            • A generic downloader that works in some sites.

            Download it

            Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

            -

            2010.06.06

            -
            • MD5: a995ba360c5b0fbd2a22c48306367cc5 -
            • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 -
            • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096 +

              2010.07.14

              +
              • MD5: 7808ec7e428d1b3c6a697fb63a51d401 +
              • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 +
              • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a
              -

              Output template

              +

              Proxy support

              +

              youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

              +

              YouTube formats

              +

              Using the -f option and other related options, you can specify the video format to be downloaded from YouTube. Instead of keeping a video format table here, I will refer you to the list of YouTube formats on Wikipedia.

              +

              Output template

              The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

          + + + diff --git a/youtube-dl b/youtube-dl index 01a61ba..e89c915 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,9 +27,9 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', } @@ -99,7 +99,7 @@ def sanitize_open(filename, open_mode): return (stream, filename) except (IOError, OSError), err: # In case of error, try to remove win32 forbidden chars - filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename) # An exception here should be caught in the caller stream = open(filename, open_mode) @@ -189,6 +189,7 @@ class FileDownloader(object): forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. + format_limit: Highest quality format to try. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. @@ -386,6 +387,10 @@ class FileDownloader(object): self.to_stdout(u'[download] Download completed') else: self.to_stdout(u'') + + def increment_downloads(self): + """Increment the ordinal that assigns a number to each file.""" + self._num_downloads += 1 def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -555,7 +560,6 @@ class FileDownloader(object): if content_length is not None and long(content_length) == resume_len: # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) - self._num_downloads += 1 return True else: # Because the server didn't let us @@ -582,7 +586,6 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) - self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False @@ -680,18 +683,20 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag + # Listed in order of priority for the -b option + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', '37': 'mp4', + '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', '45': 'webm', } @@ -807,6 +812,10 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(2) # Downloader parameters @@ -818,6 +827,13 @@ class YoutubeIE(InfoExtractor): params = self._downloader.params format_param = params.get('format', None) if format_param == '0': + format_limit = params.get('format_limit', None) + if format_limit is not None: + try: + # Start at a different format if the user has limited the maximum quality + quality_index = self._available_formats.index(format_limit) + except ValueError: + pass format_param = self._available_formats[quality_index] best_quality = True elif format_param == '-1': @@ -838,7 +854,7 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = mobj.group(1) else: @@ -1026,6 +1042,10 @@ class MetacafeIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1085,6 +1105,94 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + mobj = re.search(r'(?im)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1115,6 +1223,9 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1223,6 +1334,9 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1298,13 +1412,16 @@ class YahooIE(InfoExtractor): def _real_initialize(self): return - def _real_extract(self, url): + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None and new_video: + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1331,7 +1448,7 @@ class YahooIE(InfoExtractor): yahoo_vid = mobj.group(1) url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) - return self._real_extract(url) + return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -1450,6 +1567,10 @@ class GenericIE(InfoExtractor): return def _real_extract(self, url): + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = url.split('/')[-1] request = urllib2.Request(url) try: @@ -2001,22 +2122,22 @@ if __name__ == '__main__': parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', - dest='retries', metavar='T', help='number of retries (default is 10)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') + dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') + dest='password', metavar='PASSWORD', help='account password') authentication.add_option('-n', '--netrc', action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) parser.add_option_group(authentication) video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', @@ -2025,6 +2146,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2050,9 +2173,9 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -2101,6 +2224,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) @@ -2123,6 +2247,7 @@ if __name__ == '__main__': 'forcedescription': opts.getdescription, 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, + 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') @@ -2141,6 +2266,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) fd.add_info_extractor(google_search_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index 53607b5..fd32319 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.06.06 +Version: 2010.07.14 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Jul 15 2010 Till Maas - 2010.07.14-1 +- Update to latest release + * Mon Jun 07 2010 Till Maas - 2010.06.06-1 - Update to latest release From efe36ab29de46bbf199a040f003ec4beb4ec732f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 15 Jul 2010 18:01:08 +0000 Subject: [PATCH 020/279] - Update to latest release --- Home | 139 ++++++++++++++++++----------------------- youtube-dl | 160 +++++++++++++++++++++++++++++++++++++++++++----- youtube-dl.spec | 5 +- 3 files changed, 207 insertions(+), 97 deletions(-) diff --git a/Home b/Home index 24651a9..cf1d5c0 100644 --- a/Home +++ b/Home @@ -9,12 +9,12 @@ - - + +
          @@ -86,29 +72,13 @@
          -
          -
          -
          - - - -
          -
          -
          -
          - - -
          -
          -
          -
          @@ -207,7 +177,7 @@
        • - Issues (10) » + Issues (12) » @@ -265,9 +235,9 @@ @@ -283,7 +253,7 @@ @@ -293,7 +263,9 @@ @@ -378,9 +348,9 @@ @@ -401,7 +371,7 @@

          youtube-dl is a small command-line program for downloading videos from YouTube.com.

          -
          Clone this repository (size: 269.0 KB): HTTPS / SSH
          +
          Clone this repository (size: 287.0 KB): HTTPS / SSH
          $ hg clone http://bitbucket.org/rg3/youtube-dl
          @@ -443,46 +413,41 @@

          youtube-dl: Download videos from YouTube.com

          (and more...)

          What is it?

          -

          youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          +

          youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

          Thanks for all the feedback received so far. I'm glad people find my program useful.

          Usage instructions

          In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

          In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

          -

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          +

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are usually in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

          More usage tips

          -
          • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
          • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
          • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
          • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
          • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
          • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
          • In a similar line, the -e or --get-title option tells the program to print the video title. -
          • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
          • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
          • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
          • The -m or --mobile-version option is an alias for -f 17. -
          • The -d or --high-def option is an alias for -f 22. -
          • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
          • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
          • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
          • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
          • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
          • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
          • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
          • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
          • For usage instructions, use youtube-dl -h or youtube-dl --help. -
          • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +

            The program is usually invoked as youtube-dl followed by options and the video URLs. Listing all the options here would make this text too long, so you can run youtube-dl --help and get a summary of them. From that point on you can start experimenting with the different options yourself. The most common ones are -t (or -l) to include the video title in the file name, and either -f or -b to download a high quality version of the video. Also, the -o option can specify the output file name and path. It allows special character sequences that can be used as templates to be replaced. See the "Output template" section for more details.

            +

            Supported sites

            +
            • YouTube.com. +
            • YouTube.com playlists (playlist URLs in "view_play_list" form). +
            • YouTube.com searches, using the special keyword "ytsearch" as a form of URL, as in "ytsearch:cute kittens". Do not forget the quotes if you want to include spaces in your search. Other variants are "ytsearchN" to download more than the first result, with N being a number, and "ytsearchall". +
            • metacafe.com. +
            • Google Video. +
            • Google Video searches ("gvsearch" keyword). +
            • Photobucket videos. +
            • Yahoo! video. +
            • Yahoo! video searches ("ybsearch" keyword). +
            • Dailymotion. +
            • A generic downloader that works in some sites.

            Download it

            Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

            -

            2010.06.06

            -
            • MD5: a995ba360c5b0fbd2a22c48306367cc5 -
            • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 -
            • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096 +

              2010.07.14

              +
              • MD5: 7808ec7e428d1b3c6a697fb63a51d401 +
              • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 +
              • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a
              -

              Output template

              +

              Proxy support

              +

              youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

              +

              YouTube formats

              +

              Using the -f option and other related options, you can specify the video format to be downloaded from YouTube. Instead of keeping a video format table here, I will refer you to the list of YouTube formats on Wikipedia.

              +

              Output template

              The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

          + + + diff --git a/youtube-dl b/youtube-dl index 01a61ba..e89c915 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,9 +27,9 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', } @@ -99,7 +99,7 @@ def sanitize_open(filename, open_mode): return (stream, filename) except (IOError, OSError), err: # In case of error, try to remove win32 forbidden chars - filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename) # An exception here should be caught in the caller stream = open(filename, open_mode) @@ -189,6 +189,7 @@ class FileDownloader(object): forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. + format_limit: Highest quality format to try. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. @@ -386,6 +387,10 @@ class FileDownloader(object): self.to_stdout(u'[download] Download completed') else: self.to_stdout(u'') + + def increment_downloads(self): + """Increment the ordinal that assigns a number to each file.""" + self._num_downloads += 1 def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -555,7 +560,6 @@ class FileDownloader(object): if content_length is not None and long(content_length) == resume_len: # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) - self._num_downloads += 1 return True else: # Because the server didn't let us @@ -582,7 +586,6 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) - self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False @@ -680,18 +683,20 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag + # Listed in order of priority for the -b option + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', '37': 'mp4', + '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', '45': 'webm', } @@ -807,6 +812,10 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(2) # Downloader parameters @@ -818,6 +827,13 @@ class YoutubeIE(InfoExtractor): params = self._downloader.params format_param = params.get('format', None) if format_param == '0': + format_limit = params.get('format_limit', None) + if format_limit is not None: + try: + # Start at a different format if the user has limited the maximum quality + quality_index = self._available_formats.index(format_limit) + except ValueError: + pass format_param = self._available_formats[quality_index] best_quality = True elif format_param == '-1': @@ -838,7 +854,7 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = mobj.group(1) else: @@ -1026,6 +1042,10 @@ class MetacafeIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1085,6 +1105,94 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + mobj = re.search(r'(?im)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1115,6 +1223,9 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1223,6 +1334,9 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1298,13 +1412,16 @@ class YahooIE(InfoExtractor): def _real_initialize(self): return - def _real_extract(self, url): + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None and new_video: + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1331,7 +1448,7 @@ class YahooIE(InfoExtractor): yahoo_vid = mobj.group(1) url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) - return self._real_extract(url) + return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -1450,6 +1567,10 @@ class GenericIE(InfoExtractor): return def _real_extract(self, url): + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = url.split('/')[-1] request = urllib2.Request(url) try: @@ -2001,22 +2122,22 @@ if __name__ == '__main__': parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', - dest='retries', metavar='T', help='number of retries (default is 10)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') + dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') + dest='password', metavar='PASSWORD', help='account password') authentication.add_option('-n', '--netrc', action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) parser.add_option_group(authentication) video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', @@ -2025,6 +2146,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2050,9 +2173,9 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -2101,6 +2224,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) @@ -2123,6 +2247,7 @@ if __name__ == '__main__': 'forcedescription': opts.getdescription, 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, + 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') @@ -2141,6 +2266,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) fd.add_info_extractor(google_search_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index 53607b5..fd32319 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.06.06 +Version: 2010.07.14 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Jul 15 2010 Till Maas - 2010.07.14-1 +- Update to latest release + * Mon Jun 07 2010 Till Maas - 2010.06.06-1 - Update to latest release From 4229abb7b2e772cb833bc75fcde80b4e2805793e Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 15 Jul 2010 18:01:09 +0000 Subject: [PATCH 021/279] - Update to latest release --- Home | 139 ++++++++++++++++++----------------------- youtube-dl | 160 +++++++++++++++++++++++++++++++++++++++++++----- youtube-dl.spec | 5 +- 3 files changed, 207 insertions(+), 97 deletions(-) diff --git a/Home b/Home index 24651a9..cf1d5c0 100644 --- a/Home +++ b/Home @@ -9,12 +9,12 @@ - - + +
          @@ -86,29 +72,13 @@
          -
          -
          -
          - - - -
          -
          -
          -
          - - -
          -
          -
          -
          @@ -207,7 +177,7 @@
        • - Issues (10) » + Issues (12) » @@ -265,9 +235,9 @@ @@ -283,7 +253,7 @@ @@ -293,7 +263,9 @@ @@ -378,9 +348,9 @@ @@ -401,7 +371,7 @@

          youtube-dl is a small command-line program for downloading videos from YouTube.com.

          -
          Clone this repository (size: 269.0 KB): HTTPS / SSH
          +
          Clone this repository (size: 287.0 KB): HTTPS / SSH
          $ hg clone http://bitbucket.org/rg3/youtube-dl
          @@ -443,46 +413,41 @@

          youtube-dl: Download videos from YouTube.com

          (and more...)

          What is it?

          -

          youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          +

          youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

          I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

          Thanks for all the feedback received so far. I'm glad people find my program useful.

          Usage instructions

          In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

          In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

          -

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          +

          After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are usually in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

          If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

          More usage tips

          -
          • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
          • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
          • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
          • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
          • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
          • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
          • In a similar line, the -e or --get-title option tells the program to print the video title. -
          • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
          • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
          • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
          • The -m or --mobile-version option is an alias for -f 17. -
          • The -d or --high-def option is an alias for -f 22. -
          • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
          • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
          • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
          • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
          • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
          • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
          • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
          • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
          • For usage instructions, use youtube-dl -h or youtube-dl --help. -
          • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. +

            The program is usually invoked as youtube-dl followed by options and the video URLs. Listing all the options here would make this text too long, so you can run youtube-dl --help and get a summary of them. From that point on you can start experimenting with the different options yourself. The most common ones are -t (or -l) to include the video title in the file name, and either -f or -b to download a high quality version of the video. Also, the -o option can specify the output file name and path. It allows special character sequences that can be used as templates to be replaced. See the "Output template" section for more details.

            +

            Supported sites

            +
            • YouTube.com. +
            • YouTube.com playlists (playlist URLs in "view_play_list" form). +
            • YouTube.com searches, using the special keyword "ytsearch" as a form of URL, as in "ytsearch:cute kittens". Do not forget the quotes if you want to include spaces in your search. Other variants are "ytsearchN" to download more than the first result, with N being a number, and "ytsearchall". +
            • metacafe.com. +
            • Google Video. +
            • Google Video searches ("gvsearch" keyword). +
            • Photobucket videos. +
            • Yahoo! video. +
            • Yahoo! video searches ("ybsearch" keyword). +
            • Dailymotion. +
            • A generic downloader that works in some sites.

            Download it

            Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

            -

            2010.06.06

            -
            • MD5: a995ba360c5b0fbd2a22c48306367cc5 -
            • SHA1: 8f077dee718d5057ed4fe7a2173bbded0fb941f6 -
            • SHA256: 690f5b91e536f426de0679d2c69d1e0c37904d21a47fbabb89d374ef9504b096 +

              2010.07.14

              +
              • MD5: 7808ec7e428d1b3c6a697fb63a51d401 +
              • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 +
              • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a
              -

              Output template

              +

              Proxy support

              +

              youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

              +

              YouTube formats

              +

              Using the -f option and other related options, you can specify the video format to be downloaded from YouTube. Instead of keeping a video format table here, I will refer you to the list of YouTube formats on Wikipedia.

              +

              Output template

              The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

          + + + diff --git a/youtube-dl b/youtube-dl index 01a61ba..e89c915 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,9 +27,9 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', } @@ -99,7 +99,7 @@ def sanitize_open(filename, open_mode): return (stream, filename) except (IOError, OSError), err: # In case of error, try to remove win32 forbidden chars - filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename) # An exception here should be caught in the caller stream = open(filename, open_mode) @@ -189,6 +189,7 @@ class FileDownloader(object): forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. + format_limit: Highest quality format to try. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. @@ -386,6 +387,10 @@ class FileDownloader(object): self.to_stdout(u'[download] Download completed') else: self.to_stdout(u'') + + def increment_downloads(self): + """Increment the ordinal that assigns a number to each file.""" + self._num_downloads += 1 def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -555,7 +560,6 @@ class FileDownloader(object): if content_length is not None and long(content_length) == resume_len: # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) - self._num_downloads += 1 return True else: # Because the server didn't let us @@ -582,7 +586,6 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) - self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False @@ -680,18 +683,20 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag + # Listed in order of priority for the -b option + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', '37': 'mp4', + '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', '45': 'webm', } @@ -807,6 +812,10 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(2) # Downloader parameters @@ -818,6 +827,13 @@ class YoutubeIE(InfoExtractor): params = self._downloader.params format_param = params.get('format', None) if format_param == '0': + format_limit = params.get('format_limit', None) + if format_limit is not None: + try: + # Start at a different format if the user has limited the maximum quality + quality_index = self._available_formats.index(format_limit) + except ValueError: + pass format_param = self._available_formats[quality_index] best_quality = True elif format_param == '-1': @@ -838,7 +854,7 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = mobj.group(1) else: @@ -1026,6 +1042,10 @@ class MetacafeIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1085,6 +1105,94 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + mobj = re.search(r'(?im)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1115,6 +1223,9 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1223,6 +1334,9 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1298,13 +1412,16 @@ class YahooIE(InfoExtractor): def _real_initialize(self): return - def _real_extract(self, url): + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None and new_video: + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1331,7 +1448,7 @@ class YahooIE(InfoExtractor): yahoo_vid = mobj.group(1) url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) - return self._real_extract(url) + return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -1450,6 +1567,10 @@ class GenericIE(InfoExtractor): return def _real_extract(self, url): + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = url.split('/')[-1] request = urllib2.Request(url) try: @@ -2001,22 +2122,22 @@ if __name__ == '__main__': parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', - dest='retries', metavar='T', help='number of retries (default is 10)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') + dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') + dest='password', metavar='PASSWORD', help='account password') authentication.add_option('-n', '--netrc', action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) parser.add_option_group(authentication) video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', @@ -2025,6 +2146,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2050,9 +2173,9 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -2101,6 +2224,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) @@ -2123,6 +2247,7 @@ if __name__ == '__main__': 'forcedescription': opts.getdescription, 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, + 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') @@ -2141,6 +2266,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) fd.add_info_extractor(google_search_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index 53607b5..fd32319 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.06.06 +Version: 2010.07.14 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Jul 15 2010 Till Maas - 2010.07.14-1 +- Update to latest release + * Mon Jun 07 2010 Till Maas - 2010.06.06-1 - Update to latest release From 21bf5e24054159d74b918945cfc03106ad551715 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:13:45 +0000 Subject: [PATCH 022/279] - Update to latest release --- Home | 52 ++++----- youtube-dl | 289 ++++++++++++++++++++---------------------------- youtube-dl.spec | 5 +- 3 files changed, 147 insertions(+), 199 deletions(-) diff --git a/Home b/Home index cf1d5c0..aa132b1 100644 --- a/Home +++ b/Home @@ -105,7 +105,7 @@ date.setTime(date.getTime() + (365 * 24 * 60 * 60 * 1000)); var cookieoptions = { path: '/', expires: date }; - window._shard = 'fe01 (ID 1)'; + window._shard = 'bfg02-gunicorn (ID 7)'; $(document).ready(function(){ $('#toggle-repo-content').click(function(){ @@ -177,7 +177,7 @@
        • - Issues (12) » + Issues (17) » @@ -235,9 +235,9 @@ @@ -253,7 +253,7 @@ @@ -263,9 +263,11 @@
            -
          • tip
          • +
          • tip
          • -
          • 2010.07.14
          • +
          • 2010.07.22
          • + +
          • 2010.07.14
          • 2010.06.06
          • @@ -293,8 +295,6 @@
          • 2009.06.29
          • -
          • 2009.05.30
          • -
          • 2009.05.25
          • 2009.05.23
          • @@ -348,9 +348,9 @@ @@ -371,7 +371,7 @@

            youtube-dl is a small command-line program for downloading videos from YouTube.com.

            -
            Clone this repository (size: 287.0 KB): HTTPS / SSH
            +
            Clone this repository (size: 296.3 KB): HTTPS / SSH
            $ hg clone http://bitbucket.org/rg3/youtube-dl
            @@ -413,7 +413,7 @@

            youtube-dl: Download videos from YouTube.com

            (and more...)

            What is it?

            -

            youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

            +

            youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.22. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

            I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

            Thanks for all the feedback received so far. I'm glad people find my program useful.

            Usage instructions

            @@ -438,10 +438,10 @@

          Download it

          Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

          -

          2010.07.14

          -
          • MD5: 7808ec7e428d1b3c6a697fb63a51d401 -
          • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 -
          • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a +

            2010.07.22

            +
            • MD5: d3a69eb8211e0aa7b61e0afbfe507d41 +
            • SHA1: bafb1b4716ddaae506f10da3394ce7672d5f4e42 +
            • SHA256: 0307ec6b7b58ef26e66afd9e260aba314ce706831a9cbaef3cb65705a66289cd

            Proxy support

            youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

            @@ -469,12 +469,6 @@ -
            - - This revision is from 2010-07-14 18:38 - -
            - @@ -489,8 +483,8 @@ Django 1.2.1 / Piston 0.2.3rc1 / Hg 1.3.1 / - Python 2.5.2 / - r3065| fe01 + Python 2.7.0 / + r3099| bfg02 diff --git a/youtube-dl b/youtube-dl index e89c915..5fd331e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -131,7 +131,7 @@ class PostProcessingError(Exception): """ pass -class UnavailableFormatError(Exception): +class UnavailableVideoError(Exception): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -401,7 +401,7 @@ class FileDownloader(object): try: self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + raise UnavailableVideoError # Forced printings if self.params.get('forcetitle', False): @@ -435,7 +435,7 @@ class FileDownloader(object): try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: - raise UnavailableFormatError + raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -684,12 +684,12 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - # Listed in order of priority for the -b option - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + # Listed in order of quality + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -812,124 +812,109 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - - # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() video_id = mobj.group(2) - # Downloader parameters - best_quality = False - all_formats = False - format_param = None - quality_index = 0 - if self._downloader is not None: - params = self._downloader.params - format_param = params.get('format', None) - if format_param == '0': - format_limit = params.get('format_limit', None) - if format_limit is not None: - try: - # Start at a different format if the user has limited the maximum quality - quality_index = self._available_formats.index(format_limit) - except ValueError: - pass - format_param = self._available_formats[quality_index] - best_quality = True - elif format_param == '-1': - format_param = self._available_formats[quality_index] - all_formats = True - - while True: - # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return - # Get video webpage - self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + + # Get video info + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) try: - video_webpage = urllib2.urlopen(request).read() + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + self.report_information_extraction(video_id) - # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + # uploader + if 'author' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote_plus(video_info['author'][0]) + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) if mobj is not None: - player_url = mobj.group(1) + video_description = mobj.group(1) + + # Decide which formats to download + if 'fmt_url_map' in video_info: + url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + format_limit = self._downloader.params.get('format_limit', None) + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] else: - player_url = None - - # Get video info - self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) - try: - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - if 'token' not in video_info: - # Attempt to see if YouTube has issued an error message - if 'reason' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') - stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') - stream.write(video_info_webpage) - stream.close() - else: - reason = urllib.unquote_plus(video_info['reason'][0]) - self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') return - token = urllib.unquote_plus(video_info['token'][0]) - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - - # Check possible RTMP download - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_real_url = video_info['conn'][0] - - # uploader - if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + requested_format = self._downloader.params.get('format', None) + if requested_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif requested_format == '-1': + video_url_list = url_map.items() # All formats + else: + if requested_format not in existing_formats: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + else: + self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + return - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = urllib.unquote_plus(video_info['title'][0]) - video_title = video_title.decode('utf-8') - video_title = sanitize_title(video_title) - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') - - # thumbnail image - if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - - # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + for format_param, video_real_url in video_url_list: + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -944,32 +929,8 @@ class YoutubeIE(InfoExtractor): 'description': video_description.decode('utf-8'), 'player_url': player_url, }) - - if all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # None left to get - return - else: - format_param = self._available_formats[quality_index] - continue - return - - except UnavailableFormatError, err: - if best_quality or all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # I don't ever expect this to happen - if not all_formats: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - else: - self.report_unavailable_format(video_id, format_param) - format_param = self._available_formats[quality_index] - continue - else: - self._downloader.trouble('ERROR: format not available for video') - return + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1043,8 +1004,7 @@ class MetacafeIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1101,8 +1061,8 @@ class MetacafeIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1136,8 +1096,7 @@ class DailymotionIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1190,8 +1149,8 @@ class DailymotionIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1224,8 +1183,7 @@ class GoogleIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1300,8 +1258,8 @@ class GoogleIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1335,8 +1293,7 @@ class PhotobucketIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1382,8 +1339,8 @@ class PhotobucketIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1420,8 +1377,7 @@ class YahooIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None and new_video: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1540,8 +1496,8 @@ class YahooIE(InfoExtractor): 'description': video_description, 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1568,8 +1524,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = url.split('/')[-1] request = urllib2.Request(url) @@ -1640,8 +1595,8 @@ class GenericIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): @@ -2109,7 +2064,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.06.06', + version='2010.07.22', conflict_handler='resolve', ) @@ -2138,16 +2093,12 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') - video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube-dl.spec b/youtube-dl.spec index fd32319..410ed8b 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.14 +Version: 2010.07.22 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +- Update to latest release + * Thu Jul 15 2010 Till Maas - 2010.07.14-1 - Update to latest release From 69c8e8dbb612dacf39fcc72e409c8e8fe9fc67c2 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:13:45 +0000 Subject: [PATCH 023/279] - Update to latest release --- Home | 52 ++++----- youtube-dl | 289 ++++++++++++++++++++---------------------------- youtube-dl.spec | 5 +- 3 files changed, 147 insertions(+), 199 deletions(-) diff --git a/Home b/Home index cf1d5c0..aa132b1 100644 --- a/Home +++ b/Home @@ -105,7 +105,7 @@ date.setTime(date.getTime() + (365 * 24 * 60 * 60 * 1000)); var cookieoptions = { path: '/', expires: date }; - window._shard = 'fe01 (ID 1)'; + window._shard = 'bfg02-gunicorn (ID 7)'; $(document).ready(function(){ $('#toggle-repo-content').click(function(){ @@ -177,7 +177,7 @@
          • - Issues (12) » + Issues (17) » @@ -235,9 +235,9 @@ @@ -253,7 +253,7 @@ @@ -263,9 +263,11 @@
              -
            • tip
            • +
            • tip
            • -
            • 2010.07.14
            • +
            • 2010.07.22
            • + +
            • 2010.07.14
            • 2010.06.06
            • @@ -293,8 +295,6 @@
            • 2009.06.29
            • -
            • 2009.05.30
            • -
            • 2009.05.25
            • 2009.05.23
            • @@ -348,9 +348,9 @@ @@ -371,7 +371,7 @@

              youtube-dl is a small command-line program for downloading videos from YouTube.com.

              -
              Clone this repository (size: 287.0 KB): HTTPS / SSH
              +
              Clone this repository (size: 296.3 KB): HTTPS / SSH
              $ hg clone http://bitbucket.org/rg3/youtube-dl
              @@ -413,7 +413,7 @@

              youtube-dl: Download videos from YouTube.com

              (and more...)

              What is it?

              -

              youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

              +

              youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.22. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

              I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

              Thanks for all the feedback received so far. I'm glad people find my program useful.

              Usage instructions

              @@ -438,10 +438,10 @@

            Download it

            Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

            -

            2010.07.14

            -
            • MD5: 7808ec7e428d1b3c6a697fb63a51d401 -
            • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 -
            • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a +

              2010.07.22

              +
              • MD5: d3a69eb8211e0aa7b61e0afbfe507d41 +
              • SHA1: bafb1b4716ddaae506f10da3394ce7672d5f4e42 +
              • SHA256: 0307ec6b7b58ef26e66afd9e260aba314ce706831a9cbaef3cb65705a66289cd

              Proxy support

              youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

              @@ -469,12 +469,6 @@ -
              - - This revision is from 2010-07-14 18:38 - -
              - @@ -489,8 +483,8 @@ Django 1.2.1 / Piston 0.2.3rc1 / Hg 1.3.1 / - Python 2.5.2 / - r3065| fe01 + Python 2.7.0 / + r3099| bfg02 diff --git a/youtube-dl b/youtube-dl index e89c915..5fd331e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -131,7 +131,7 @@ class PostProcessingError(Exception): """ pass -class UnavailableFormatError(Exception): +class UnavailableVideoError(Exception): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -401,7 +401,7 @@ class FileDownloader(object): try: self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + raise UnavailableVideoError # Forced printings if self.params.get('forcetitle', False): @@ -435,7 +435,7 @@ class FileDownloader(object): try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: - raise UnavailableFormatError + raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -684,12 +684,12 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - # Listed in order of priority for the -b option - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + # Listed in order of quality + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -812,124 +812,109 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - - # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() video_id = mobj.group(2) - # Downloader parameters - best_quality = False - all_formats = False - format_param = None - quality_index = 0 - if self._downloader is not None: - params = self._downloader.params - format_param = params.get('format', None) - if format_param == '0': - format_limit = params.get('format_limit', None) - if format_limit is not None: - try: - # Start at a different format if the user has limited the maximum quality - quality_index = self._available_formats.index(format_limit) - except ValueError: - pass - format_param = self._available_formats[quality_index] - best_quality = True - elif format_param == '-1': - format_param = self._available_formats[quality_index] - all_formats = True - - while True: - # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return - # Get video webpage - self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + + # Get video info + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) try: - video_webpage = urllib2.urlopen(request).read() + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + self.report_information_extraction(video_id) - # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + # uploader + if 'author' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote_plus(video_info['author'][0]) + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) if mobj is not None: - player_url = mobj.group(1) + video_description = mobj.group(1) + + # Decide which formats to download + if 'fmt_url_map' in video_info: + url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + format_limit = self._downloader.params.get('format_limit', None) + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] else: - player_url = None - - # Get video info - self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) - try: - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - if 'token' not in video_info: - # Attempt to see if YouTube has issued an error message - if 'reason' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') - stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') - stream.write(video_info_webpage) - stream.close() - else: - reason = urllib.unquote_plus(video_info['reason'][0]) - self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') return - token = urllib.unquote_plus(video_info['token'][0]) - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - - # Check possible RTMP download - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_real_url = video_info['conn'][0] - - # uploader - if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + requested_format = self._downloader.params.get('format', None) + if requested_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif requested_format == '-1': + video_url_list = url_map.items() # All formats + else: + if requested_format not in existing_formats: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + else: + self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + return - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = urllib.unquote_plus(video_info['title'][0]) - video_title = video_title.decode('utf-8') - video_title = sanitize_title(video_title) - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') - - # thumbnail image - if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - - # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + for format_param, video_real_url in video_url_list: + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -944,32 +929,8 @@ class YoutubeIE(InfoExtractor): 'description': video_description.decode('utf-8'), 'player_url': player_url, }) - - if all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # None left to get - return - else: - format_param = self._available_formats[quality_index] - continue - return - - except UnavailableFormatError, err: - if best_quality or all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # I don't ever expect this to happen - if not all_formats: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - else: - self.report_unavailable_format(video_id, format_param) - format_param = self._available_formats[quality_index] - continue - else: - self._downloader.trouble('ERROR: format not available for video') - return + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1043,8 +1004,7 @@ class MetacafeIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1101,8 +1061,8 @@ class MetacafeIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1136,8 +1096,7 @@ class DailymotionIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1190,8 +1149,8 @@ class DailymotionIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1224,8 +1183,7 @@ class GoogleIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1300,8 +1258,8 @@ class GoogleIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1335,8 +1293,7 @@ class PhotobucketIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1382,8 +1339,8 @@ class PhotobucketIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1420,8 +1377,7 @@ class YahooIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None and new_video: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1540,8 +1496,8 @@ class YahooIE(InfoExtractor): 'description': video_description, 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1568,8 +1524,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = url.split('/')[-1] request = urllib2.Request(url) @@ -1640,8 +1595,8 @@ class GenericIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): @@ -2109,7 +2064,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.06.06', + version='2010.07.22', conflict_handler='resolve', ) @@ -2138,16 +2093,12 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') - video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube-dl.spec b/youtube-dl.spec index fd32319..410ed8b 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.14 +Version: 2010.07.22 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +- Update to latest release + * Thu Jul 15 2010 Till Maas - 2010.07.14-1 - Update to latest release From 67f822011be59d61a47d187ed4139222b143e101 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:13:46 +0000 Subject: [PATCH 024/279] - Update to latest release --- Home | 52 ++++----- youtube-dl | 289 ++++++++++++++++++++---------------------------- youtube-dl.spec | 5 +- 3 files changed, 147 insertions(+), 199 deletions(-) diff --git a/Home b/Home index cf1d5c0..aa132b1 100644 --- a/Home +++ b/Home @@ -105,7 +105,7 @@ date.setTime(date.getTime() + (365 * 24 * 60 * 60 * 1000)); var cookieoptions = { path: '/', expires: date }; - window._shard = 'fe01 (ID 1)'; + window._shard = 'bfg02-gunicorn (ID 7)'; $(document).ready(function(){ $('#toggle-repo-content').click(function(){ @@ -177,7 +177,7 @@
            • - Issues (12) » + Issues (17) » @@ -235,9 +235,9 @@ @@ -253,7 +253,7 @@ @@ -263,9 +263,11 @@
                -
              • tip
              • +
              • tip
              • -
              • 2010.07.14
              • +
              • 2010.07.22
              • + +
              • 2010.07.14
              • 2010.06.06
              • @@ -293,8 +295,6 @@
              • 2009.06.29
              • -
              • 2009.05.30
              • -
              • 2009.05.25
              • 2009.05.23
              • @@ -348,9 +348,9 @@ @@ -371,7 +371,7 @@

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                -
                Clone this repository (size: 287.0 KB): HTTPS / SSH
                +
                Clone this repository (size: 296.3 KB): HTTPS / SSH
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                @@ -413,7 +413,7 @@

                youtube-dl: Download videos from YouTube.com

                (and more...)

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                +

                youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.22. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                Usage instructions

                @@ -438,10 +438,10 @@

              Download it

              Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

              -

              2010.07.14

              -
              • MD5: 7808ec7e428d1b3c6a697fb63a51d401 -
              • SHA1: d1ee2a5aa27af89ea8b5d2f3c98fdfe2c75a9460 -
              • SHA256: 7c27bedbfb9ae00dcdc148211c5da612ce4229706b5e5df3f2bf1b263c058d9a +

                2010.07.22

                +
                • MD5: d3a69eb8211e0aa7b61e0afbfe507d41 +
                • SHA1: bafb1b4716ddaae506f10da3394ce7672d5f4e42 +
                • SHA256: 0307ec6b7b58ef26e66afd9e260aba314ce706831a9cbaef3cb65705a66289cd

                Proxy support

                youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

                @@ -469,12 +469,6 @@ -
                - - This revision is from 2010-07-14 18:38 - -
                - @@ -489,8 +483,8 @@ Django 1.2.1 / Piston 0.2.3rc1 / Hg 1.3.1 / - Python 2.5.2 / - r3065| fe01 + Python 2.7.0 / + r3099| bfg02 diff --git a/youtube-dl b/youtube-dl index e89c915..5fd331e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -131,7 +131,7 @@ class PostProcessingError(Exception): """ pass -class UnavailableFormatError(Exception): +class UnavailableVideoError(Exception): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -401,7 +401,7 @@ class FileDownloader(object): try: self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + raise UnavailableVideoError # Forced printings if self.params.get('forcetitle', False): @@ -435,7 +435,7 @@ class FileDownloader(object): try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: - raise UnavailableFormatError + raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -684,12 +684,12 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - # Listed in order of priority for the -b option - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + # Listed in order of quality + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -812,124 +812,109 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - - # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() video_id = mobj.group(2) - # Downloader parameters - best_quality = False - all_formats = False - format_param = None - quality_index = 0 - if self._downloader is not None: - params = self._downloader.params - format_param = params.get('format', None) - if format_param == '0': - format_limit = params.get('format_limit', None) - if format_limit is not None: - try: - # Start at a different format if the user has limited the maximum quality - quality_index = self._available_formats.index(format_limit) - except ValueError: - pass - format_param = self._available_formats[quality_index] - best_quality = True - elif format_param == '-1': - format_param = self._available_formats[quality_index] - all_formats = True - - while True: - # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return - # Get video webpage - self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + + # Get video info + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) try: - video_webpage = urllib2.urlopen(request).read() + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + self.report_information_extraction(video_id) - # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + # uploader + if 'author' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote_plus(video_info['author'][0]) + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) if mobj is not None: - player_url = mobj.group(1) + video_description = mobj.group(1) + + # Decide which formats to download + if 'fmt_url_map' in video_info: + url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + format_limit = self._downloader.params.get('format_limit', None) + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] else: - player_url = None - - # Get video info - self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) - try: - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - if 'token' not in video_info: - # Attempt to see if YouTube has issued an error message - if 'reason' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') - stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') - stream.write(video_info_webpage) - stream.close() - else: - reason = urllib.unquote_plus(video_info['reason'][0]) - self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') return - token = urllib.unquote_plus(video_info['token'][0]) - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - - # Check possible RTMP download - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_real_url = video_info['conn'][0] - - # uploader - if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + requested_format = self._downloader.params.get('format', None) + if requested_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif requested_format == '-1': + video_url_list = url_map.items() # All formats + else: + if requested_format not in existing_formats: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + else: + self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + return - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = urllib.unquote_plus(video_info['title'][0]) - video_title = video_title.decode('utf-8') - video_title = sanitize_title(video_title) - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') - - # thumbnail image - if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - - # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + for format_param, video_real_url in video_url_list: + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -944,32 +929,8 @@ class YoutubeIE(InfoExtractor): 'description': video_description.decode('utf-8'), 'player_url': player_url, }) - - if all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # None left to get - return - else: - format_param = self._available_formats[quality_index] - continue - return - - except UnavailableFormatError, err: - if best_quality or all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # I don't ever expect this to happen - if not all_formats: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - else: - self.report_unavailable_format(video_id, format_param) - format_param = self._available_formats[quality_index] - continue - else: - self._downloader.trouble('ERROR: format not available for video') - return + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1043,8 +1004,7 @@ class MetacafeIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1101,8 +1061,8 @@ class MetacafeIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1136,8 +1096,7 @@ class DailymotionIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1190,8 +1149,8 @@ class DailymotionIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1224,8 +1183,7 @@ class GoogleIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1300,8 +1258,8 @@ class GoogleIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1335,8 +1293,7 @@ class PhotobucketIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1382,8 +1339,8 @@ class PhotobucketIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1420,8 +1377,7 @@ class YahooIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None and new_video: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1540,8 +1496,8 @@ class YahooIE(InfoExtractor): 'description': video_description, 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1568,8 +1524,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = url.split('/')[-1] request = urllib2.Request(url) @@ -1640,8 +1595,8 @@ class GenericIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): @@ -2109,7 +2064,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.06.06', + version='2010.07.22', conflict_handler='resolve', ) @@ -2138,16 +2093,12 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') - video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube-dl.spec b/youtube-dl.spec index fd32319..410ed8b 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.14 +Version: 2010.07.22 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +- Update to latest release + * Thu Jul 15 2010 Till Maas - 2010.07.14-1 - Update to latest release From 271285f7e7520d2f4607b2b6aea5327ad4d0d17b Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:14:44 +0000 Subject: [PATCH 025/279] fix version in changelog --- youtube-dl.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 410ed8b..ac85c2f 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -39,7 +39,7 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog -* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +* Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release * Thu Jul 15 2010 Till Maas - 2010.07.14-1 From 4094eb9005c6f29ec06824216ab82bfc9b9d51d4 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:14:44 +0000 Subject: [PATCH 026/279] fix version in changelog --- youtube-dl.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 410ed8b..ac85c2f 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -39,7 +39,7 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog -* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +* Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release * Thu Jul 15 2010 Till Maas - 2010.07.14-1 From 52d84f44c04aa85cfa81573a3a18c468f39bd6c9 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:14:44 +0000 Subject: [PATCH 027/279] fix version in changelog --- youtube-dl.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 410ed8b..ac85c2f 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -39,7 +39,7 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog -* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +* Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release * Thu Jul 15 2010 Till Maas - 2010.07.14-1 From d4285850f8f9a8df0524dd93f76437f633636fb6 Mon Sep 17 00:00:00 2001 From: Fedora Release Engineering Date: Thu, 29 Jul 2010 16:21:53 +0000 Subject: [PATCH 028/279] dist-git conversion --- .cvsignore => .gitignore | 0 Makefile | 21 --------------------- branch | 1 - 3 files changed, 22 deletions(-) rename .cvsignore => .gitignore (100%) delete mode 100644 Makefile delete mode 100644 branch diff --git a/.cvsignore b/.gitignore similarity index 100% rename from .cvsignore rename to .gitignore diff --git a/Makefile b/Makefile deleted file mode 100644 index e163bdb..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile for source rpm: youtube-dl -# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ -NAME := youtube-dl -SPECFILE = $(firstword $(wildcard *.spec)) - -define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done -endef - -MAKEFILE_COMMON := $(shell $(find-makefile-common)) - -ifeq ($(MAKEFILE_COMMON),) -# attept a checkout -define checkout-makefile-common -test -f CVS/Root && { cvs -Q -d $$(cat CVS/Root) checkout common && echo "common/Makefile.common" ; } || { echo "ERROR: I can't figure out how to checkout the 'common' module." ; exit -1 ; } >&2 -endef - -MAKEFILE_COMMON := $(shell $(checkout-makefile-common)) -endif - -include $(MAKEFILE_COMMON) diff --git a/branch b/branch deleted file mode 100644 index baa94ef..0000000 --- a/branch +++ /dev/null @@ -1 +0,0 @@ -F-13 From 9f6a81382ad5a1e1b32b98ca8807e4698feed8f9 Mon Sep 17 00:00:00 2001 From: Fedora Release Engineering Date: Thu, 29 Jul 2010 16:21:53 +0000 Subject: [PATCH 029/279] dist-git conversion --- .cvsignore => .gitignore | 0 Makefile | 21 --------------------- branch | 1 - 3 files changed, 22 deletions(-) rename .cvsignore => .gitignore (100%) delete mode 100644 Makefile delete mode 100644 branch diff --git a/.cvsignore b/.gitignore similarity index 100% rename from .cvsignore rename to .gitignore diff --git a/Makefile b/Makefile deleted file mode 100644 index e163bdb..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile for source rpm: youtube-dl -# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ -NAME := youtube-dl -SPECFILE = $(firstword $(wildcard *.spec)) - -define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done -endef - -MAKEFILE_COMMON := $(shell $(find-makefile-common)) - -ifeq ($(MAKEFILE_COMMON),) -# attept a checkout -define checkout-makefile-common -test -f CVS/Root && { cvs -Q -d $$(cat CVS/Root) checkout common && echo "common/Makefile.common" ; } || { echo "ERROR: I can't figure out how to checkout the 'common' module." ; exit -1 ; } >&2 -endef - -MAKEFILE_COMMON := $(shell $(checkout-makefile-common)) -endif - -include $(MAKEFILE_COMMON) diff --git a/branch b/branch deleted file mode 100644 index 06de2d2..0000000 --- a/branch +++ /dev/null @@ -1 +0,0 @@ -F-12 From 12dd18b544cd3ac5b910f89f7763ae245a697821 Mon Sep 17 00:00:00 2001 From: Fedora Release Engineering Date: Thu, 29 Jul 2010 16:21:55 +0000 Subject: [PATCH 030/279] dist-git conversion --- .cvsignore => .gitignore | 0 Makefile | 21 --------------------- 2 files changed, 21 deletions(-) rename .cvsignore => .gitignore (100%) delete mode 100644 Makefile diff --git a/.cvsignore b/.gitignore similarity index 100% rename from .cvsignore rename to .gitignore diff --git a/Makefile b/Makefile deleted file mode 100644 index e163bdb..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile for source rpm: youtube-dl -# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ -NAME := youtube-dl -SPECFILE = $(firstword $(wildcard *.spec)) - -define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done -endef - -MAKEFILE_COMMON := $(shell $(find-makefile-common)) - -ifeq ($(MAKEFILE_COMMON),) -# attept a checkout -define checkout-makefile-common -test -f CVS/Root && { cvs -Q -d $$(cat CVS/Root) checkout common && echo "common/Makefile.common" ; } || { echo "ERROR: I can't figure out how to checkout the 'common' module." ; exit -1 ; } >&2 -endef - -MAKEFILE_COMMON := $(shell $(checkout-makefile-common)) -endif - -include $(MAKEFILE_COMMON) From b157c3b2212dcedd9c1853b66a8185837ee6abeb Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 5 Aug 2010 13:40:16 +0200 Subject: [PATCH 031/279] Update to latest release remove unused index.html --- Home | 54 ++--- index.html | 527 ------------------------------------------------ youtube-dl | 147 +++++++++----- youtube-dl.spec | 5 +- 4 files changed, 123 insertions(+), 610 deletions(-) delete mode 100644 index.html diff --git a/Home b/Home index aa132b1..e7bce9a 100644 --- a/Home +++ b/Home @@ -53,7 +53,7 @@ - + - - - - - - - - - - - - - - - -
                -
                - -
                -
                - - - - - - - - - - - - - - - - - - -
                - -
                - -
                - - - -
                - -
                - - -
                -
                -
                - - - - -

                - rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

                - - - - - -

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                - -
                Clone this repository (size: 255.5 KB): HTTPS / SSH
                -
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                - -
                - -
                - - - - -
                - - - - - - - -
                - -
                -
                - - - -
                -

                

                -

                youtube-dl: Download videos from YouTube.com

                -

                (and more...)

                -

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                -

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                -

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                -

                Usage instructions

                -

                In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

                -

                In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

                -

                After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

                -

                If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

                -

                More usage tips

                -
                • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
                • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
                • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
                • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
                • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
                • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
                • In a similar line, the -e or --get-title option tells the program to print the video title. -
                • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
                • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
                • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
                • The -m or --mobile-version option is an alias for -f 17. -
                • The -d or --high-def option is an alias for -f 22. -
                • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
                • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
                • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
                • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
                • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
                • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
                • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
                • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
                • For usage instructions, use youtube-dl -h or youtube-dl --help. -
                • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. -
                -

                Download it

                -

                Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

                -

                2010.04.04

                -
                • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
                • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
                • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 -
                -

                Output template

                -

                The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

                -
                • id: The sequence will be replaced by the video identifier. -
                • url: The sequence will be replaced by the video URL. -
                • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
                • title: The sequence will be replaced by the literal video title. -
                • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
                • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
                • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
                • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
                -

                As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

                -

                Authors

                -
                • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
                • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
                • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
                • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
                -

                Copyright © 2006-2010 Ricardo Garcia Gonzalez

                - - - -
                - - -
                -
                - -
                - - - diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index ac85c2f..0e3e705 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.22 +Version: 2010.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Aug 05 2010 Till Maas - 2010.08.04-1 +- Update to latest release + * Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release From 5cb95ac1b8c0cb4b66870ab1cb032ed07cff1ee0 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 5 Aug 2010 13:40:16 +0200 Subject: [PATCH 032/279] Update to latest release remove unused index.html --- Home | 54 ++--- index.html | 527 ------------------------------------------------ youtube-dl | 147 +++++++++----- youtube-dl.spec | 5 +- 4 files changed, 123 insertions(+), 610 deletions(-) delete mode 100644 index.html diff --git a/Home b/Home index aa132b1..e7bce9a 100644 --- a/Home +++ b/Home @@ -53,7 +53,7 @@ - + - - - - - - - - - - - - - - - -
                -
                - -
                -
                - - - - - - - - - - - - - - - - - - -
                - -
                - -
                - - - -
                - -
                - - -
                -
                -
                - - - - -

                - rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

                - - - - - -

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                - -
                Clone this repository (size: 255.5 KB): HTTPS / SSH
                -
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                - -
                - -
                - - - - -
                - - - - - - - -
                - -
                -
                - - - -
                -

                

                -

                youtube-dl: Download videos from YouTube.com

                -

                (and more...)

                -

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                -

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                -

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                -

                Usage instructions

                -

                In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

                -

                In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

                -

                After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

                -

                If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

                -

                More usage tips

                -
                • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
                • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
                • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
                • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
                • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
                • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
                • In a similar line, the -e or --get-title option tells the program to print the video title. -
                • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
                • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
                • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
                • The -m or --mobile-version option is an alias for -f 17. -
                • The -d or --high-def option is an alias for -f 22. -
                • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
                • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
                • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
                • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
                • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
                • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
                • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
                • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
                • For usage instructions, use youtube-dl -h or youtube-dl --help. -
                • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. -
                -

                Download it

                -

                Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

                -

                2010.04.04

                -
                • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
                • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
                • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 -
                -

                Output template

                -

                The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

                -
                • id: The sequence will be replaced by the video identifier. -
                • url: The sequence will be replaced by the video URL. -
                • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
                • title: The sequence will be replaced by the literal video title. -
                • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
                • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
                • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
                • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
                -

                As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

                -

                Authors

                -
                • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
                • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
                • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
                • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
                -

                Copyright © 2006-2010 Ricardo Garcia Gonzalez

                - - - -
                - - -
                -
                - -
                - - - diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index ac85c2f..0e3e705 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.22 +Version: 2010.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Aug 05 2010 Till Maas - 2010.08.04-1 +- Update to latest release + * Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release From 5e7d1a098c8cc7e7226b4f762b641d944a2eb92f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 5 Aug 2010 13:40:16 +0200 Subject: [PATCH 033/279] Update to latest release remove unused index.html --- Home | 54 ++--- index.html | 527 ------------------------------------------------ youtube-dl | 147 +++++++++----- youtube-dl.spec | 5 +- 4 files changed, 123 insertions(+), 610 deletions(-) delete mode 100644 index.html diff --git a/Home b/Home index aa132b1..e7bce9a 100644 --- a/Home +++ b/Home @@ -53,7 +53,7 @@ - + - - - - - - - - - - - - - - - -
                -
                - -
                -
                - - - - - - - - - - - - - - - - - - -
                - -
                - -
                - - - -
                - -
                - - -
                -
                -
                - - - - -

                - rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

                - - - - - -

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                - -
                Clone this repository (size: 255.5 KB): HTTPS / SSH
                -
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                - -
                - -
                - - - - -
                - - - - - - - -
                - -
                -
                - - - -
                -

                

                -

                youtube-dl: Download videos from YouTube.com

                -

                (and more...)

                -

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                -

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                -

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                -

                Usage instructions

                -

                In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

                -

                In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

                -

                After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

                -

                If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

                -

                More usage tips

                -
                • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
                • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
                • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
                • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
                • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
                • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
                • In a similar line, the -e or --get-title option tells the program to print the video title. -
                • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
                • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
                • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
                • The -m or --mobile-version option is an alias for -f 17. -
                • The -d or --high-def option is an alias for -f 22. -
                • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
                • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
                • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
                • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
                • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
                • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
                • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
                • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
                • For usage instructions, use youtube-dl -h or youtube-dl --help. -
                • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. -
                -

                Download it

                -

                Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

                -

                2010.04.04

                -
                • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
                • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
                • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 -
                -

                Output template

                -

                The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

                -
                • id: The sequence will be replaced by the video identifier. -
                • url: The sequence will be replaced by the video URL. -
                • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
                • title: The sequence will be replaced by the literal video title. -
                • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
                • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
                • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
                • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
                -

                As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

                -

                Authors

                -
                • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
                • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
                • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
                • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
                -

                Copyright © 2006-2010 Ricardo Garcia Gonzalez

                - - - -
                - - -
                -
                - -
                - - - diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index ac85c2f..0e3e705 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.22 +Version: 2010.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Aug 05 2010 Till Maas - 2010.08.04-1 +- Update to latest release + * Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release From b3e790cc2618e740c598b4243bf0727f85f64658 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 5 Aug 2010 13:40:16 +0200 Subject: [PATCH 034/279] Update to latest release remove unused index.html --- Home | 54 ++--- index.html | 527 ------------------------------------------------ youtube-dl | 147 +++++++++----- youtube-dl.spec | 5 +- 4 files changed, 123 insertions(+), 610 deletions(-) delete mode 100644 index.html diff --git a/Home b/Home index aa132b1..e7bce9a 100644 --- a/Home +++ b/Home @@ -53,7 +53,7 @@ - + - - - - - - - - - - - - - - - -
                -
                - -
                -
                - - - - - - - - - - - - - - - - - - -
                - -
                - -
                - - - -
                - -
                - - -
                -
                -
                - - - - -

                - rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

                - - - - - -

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                - -
                Clone this repository (size: 255.5 KB): HTTPS / SSH
                -
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                - -
                - -
                - - - - -
                - - - - - - - -
                - -
                -
                - - - -
                -

                

                -

                youtube-dl: Download videos from YouTube.com

                -

                (and more...)

                -

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                -

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                -

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                -

                Usage instructions

                -

                In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

                -

                In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

                -

                After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

                -

                If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

                -

                More usage tips

                -
                • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
                • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
                • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
                • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
                • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
                • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
                • In a similar line, the -e or --get-title option tells the program to print the video title. -
                • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
                • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
                • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
                • The -m or --mobile-version option is an alias for -f 17. -
                • The -d or --high-def option is an alias for -f 22. -
                • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
                • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
                • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
                • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
                • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
                • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
                • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
                • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
                • For usage instructions, use youtube-dl -h or youtube-dl --help. -
                • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. -
                -

                Download it

                -

                Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

                -

                2010.04.04

                -
                • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
                • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
                • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 -
                -

                Output template

                -

                The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

                -
                • id: The sequence will be replaced by the video identifier. -
                • url: The sequence will be replaced by the video URL. -
                • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
                • title: The sequence will be replaced by the literal video title. -
                • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
                • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
                • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
                • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
                -

                As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

                -

                Authors

                -
                • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
                • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
                • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
                • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
                -

                Copyright © 2006-2010 Ricardo Garcia Gonzalez

                - - - -
                - - -
                -
                - -
                - - - diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index ac85c2f..0e3e705 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.22 +Version: 2010.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Aug 05 2010 Till Maas - 2010.08.04-1 +- Update to latest release + * Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release From 86328827ba135faa6effe01c5a6fec7609b75294 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 7 Oct 2010 21:42:04 +0200 Subject: [PATCH 035/279] Update to latest release --- Home | 219 ++++++++++++++++++++++++++++-------------------- youtube-dl | 82 ++++++++++-------- youtube-dl.spec | 5 +- 3 files changed, 180 insertions(+), 126 deletions(-) diff --git a/Home b/Home index e7bce9a..af1294a 100644 --- a/Home +++ b/Home @@ -1,25 +1,39 @@ - - + + - rg3 / youtube-dl / wiki / Home — bitbucket.org + rg3 / youtube-dl / wiki / Home — Bitbucket - - - - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                - - -
                - -
                -
                - - - - - - - - - - - - -
                - -
                - -
                - - - -
                - -
                - - -
                -
                -
                - - - - -

                - rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

                - - - - - -

                youtube-dl is a small command-line program for downloading videos from YouTube.com.

                - -
                Clone this repository (size: 341.1 KB): HTTPS / SSH
                -
                $ hg clone http://bitbucket.org/rg3/youtube-dl
                - -
                - -
                - - - - -
                - - - - - - - -
                - -
                -
                - - - -
                -

                

                -

                youtube-dl: Download videos from YouTube.com

                -

                (and more...)

                -

                What is it?

                -

                youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.10.03. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

                -

                I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

                -

                Thanks for all the feedback received so far. I'm glad people find my program useful.

                -

                Usage instructions

                -

                In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

                -

                In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

                -

                After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are usually in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

                -

                If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

                -

                More usage tips

                -

                The program is usually invoked as youtube-dl followed by options and the video URLs. Listing all the options here would make this text too long, so you can run youtube-dl --help and get a summary of them. From that point on you can start experimenting with the different options yourself. The most common ones are -t (or -l) to include the video title in the file name, and either -f or -b to download a high quality version of the video. Also, the -o option can specify the output file name and path. It allows special character sequences that can be used as templates to be replaced. See the "Output template" section for more details.

                -

                Download it

                -

                Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

                -

                2010.10.03

                -
                • MD5: c552c00881845689900b67525ac6f72b -
                • SHA1: 436006432018fca75298e63742bb2c13feb42f98 -
                • SHA256: eb6a5deb494e8949918ca5a674d961439963fadd822df67dfa49ce47fa326d75 -
                -

                FAQ

                -

                The Frequently Asked Questions page contains answers to some common questions that pop up in my e-mail and in the issue tracker. Be sure to check it before reporting problems.

                -

                Supported sites

                -
                • YouTube.com. -
                • YouTube.com playlists (playlist URLs in "view_play_list" form). -
                • YouTube.com searches, using the special keyword "ytsearch" as a form of URL, as in "ytsearch:cute kittens". Do not forget the quotes if you want to include spaces in your search. Other variants are "ytsearchN" to download more than the first result, with N being a number, and "ytsearchall". -
                • metacafe.com. -
                • Google Video. -
                • Google Video searches ("gvsearch" keyword). -
                • Photobucket videos. -
                • Yahoo! video. -
                • Yahoo! video searches ("ybsearch" keyword). -
                • Dailymotion. -
                • A generic downloader that works in some sites. -
                -

                Proxy support

                -

                youtube-dl supports downloading videos through a proxy, by setting the http_proxy environment variable to the proxy URL, as in http://proxy_machine_name:port/.

                -

                YouTube formats

                -

                Using the -f option and other related options, you can specify the video format to be downloaded from YouTube. Instead of keeping a video format table here, I will refer you to the list of YouTube formats on Wikipedia.

                -

                Output template

                -

                The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

                -
                • id: The sequence will be replaced by the video identifier. -
                • url: The sequence will be replaced by the video URL. -
                • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
                • title: The sequence will be replaced by the literal video title. -
                • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
                • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
                • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
                • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
                -

                As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

                -

                Authors

                -
                • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
                • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
                • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
                • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
                -

                Copyright © 2006-2010 Ricardo Garcia Gonzalez

                - - - -
                - - -
                - - - -
                - We run - Django 1.2.3 / - Piston 0.2.3rc1 / - Hg 1.6 / - Python 2.7.0 / - r3577 | bitbucket01 - -
                - -
                -
                - - - - - - - - - - - - - - diff --git a/youtube-dl.spec b/youtube-dl.spec index a93424f..9c3a358 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,13 +1,12 @@ Name: youtube-dl -Version: 2010.10.03 +Version: 2010.10.24 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia License: Public Domain -URL: http://bitbucket.org/rg3/youtube-dl -Source0: http://bitbucket.org/rg3/youtube-dl/raw/%{version}/youtube-dl -Source1: http://bitbucket.org/rg3/youtube-dl/wiki/Home +URL: http://rg3.github.com/youtube-dl/ +Source0: http://github.com/rg3/youtube-dl/raw/%{version}/youtube-dl BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch Requires: python >= 2.4 @@ -20,7 +19,7 @@ youtube-dl to mały tekstowy program służący do pobierania filmów z youtube.com. %prep -install -p -m0644 %{SOURCE1} index.html +#nothing to prep %build #nothing to build @@ -28,7 +27,7 @@ install -p -m0644 %{SOURCE1} index.html %install rm -rf $RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT%{_bindir} -install -m 755 %{SOURCE0} $RPM_BUILD_ROOT%{_bindir} +install -p -m 755 %{SOURCE0} $RPM_BUILD_ROOT%{_bindir} %clean rm -rf $RPM_BUILD_ROOT @@ -36,9 +35,14 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) %{_bindir}/%{name} -%doc index.html %changelog +* Sat Nov 06 2010 Till Maas - 2010.10.24-1 +- Update to latest release +- Adjust to new upstream location at github instead of bitbucket +- add -p to install +- remove index.html + * Thu Oct 07 2010 Till Maas - 2010.10.03-1 - Update to latest release From 817631c3438d2232b10d97ba34d948255c70896f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Sun, 12 Dec 2010 22:15:53 +0100 Subject: [PATCH 037/279] - Update to latest release to adjust with youtube changes --- youtube-dl | 389 +++++++++++++++++++++++++++++++++++------------- youtube-dl.spec | 5 +- 2 files changed, 288 insertions(+), 106 deletions(-) diff --git a/youtube-dl b/youtube-dl index 51344f2..a8e3bd3 100644 --- a/youtube-dl +++ b/youtube-dl @@ -3,7 +3,10 @@ # Author: Ricardo Garcia Gonzalez # Author: Danny Colligan # Author: Benjamin Johnson +# Author: Vasyl' Vavrychuk # License: Public domain code +import cookielib +import datetime import htmlentitydefs import httplib import locale @@ -27,7 +30,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -94,6 +97,9 @@ def sanitize_open(filename, open_mode): """ try: if filename == u'-': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) return (sys.stdout, filename) stream = open(filename, open_mode) return (stream, filename) @@ -105,7 +111,6 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) - class DownloadError(Exception): """Download Error exception. @@ -181,22 +186,27 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + logtostderr: Log messages to stderr instead of stdout. """ params = None @@ -204,6 +214,7 @@ class FileDownloader(object): _pps = [] _download_retcode = None _num_downloads = None + _screen_file = None def __init__(self, params): """Create a FileDownloader object with the given options.""" @@ -211,6 +222,7 @@ class FileDownloader(object): self._pps = [] self._download_retcode = 0 self._num_downloads = 0 + self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params @staticmethod @@ -223,6 +235,13 @@ class FileDownloader(object): if not os.path.exists(dir): os.mkdir(dir) + @staticmethod + def temp_name(filename): + """Returns a temporary filename for the given filename.""" + if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)): + return filename + return filename + u'.part' + @staticmethod def format_bytes(bytes): if bytes is None: @@ -297,12 +316,13 @@ class FileDownloader(object): self._pps.append(pp) pp.set_downloader(self) - def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False): + def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False): """Print message to stdout if not in quiet mode.""" try: if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), - sys.stdout.flush() + terminator = [u'\n', u''][skip_eol] + print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()), + self._screen_file.flush() except (UnicodeEncodeError), err: if not ignore_encoding_errors: raise @@ -340,43 +360,51 @@ class FileDownloader(object): speed = float(byte_counter) / elapsed if speed > rate_limit: time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + + def try_rename(self, old_filename, new_filename): + try: + if old_filename == new_filename: + return + os.rename(old_filename, new_filename) + except (IOError, OSError), err: + self.trouble(u'ERROR: unable to rename file') def report_destination(self, filename): """Report destination filename.""" - self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) + self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" if self.params.get('noprogress', False): return - self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % + self.to_screen(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + self.to_screen(u'[download] Resuming download at byte %s' % resume_len) def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" - self.to_stdout(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) + self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_stdout(u'[download] %s has already been downloaded' % file_name) + self.to_screen(u'[download] %s has already been downloaded' % file_name) except (UnicodeEncodeError), err: - self.to_stdout(u'[download] The file has already been downloaded') + self.to_screen(u'[download] The file has already been downloaded') def report_unable_to_resume(self): """Report it was impossible to resume download.""" - self.to_stdout(u'[download] Unable to resume') + self.to_screen(u'[download] Unable to resume') def report_finish(self): """Report download finished.""" if self.params.get('noprogress', False): - self.to_stdout(u'[download] Download completed') + self.to_screen(u'[download] Download completed') else: - self.to_stdout(u'') + self.to_screen(u'') def increment_downloads(self): """Increment the ordinal that assigns a number to each file.""" @@ -401,7 +429,7 @@ class FileDownloader(object): try: template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) - template_dict['ord'] = unicode('%05d' % self._num_downloads) + template_dict['autonumber'] = unicode('%05d' % self._num_downloads) filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble(u'ERROR: invalid system charset or erroneous output template') @@ -471,6 +499,7 @@ class FileDownloader(object): def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) + tmpfilename = self.temp_name(filename) # Check for rtmpdump first try: @@ -482,36 +511,43 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename] + basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: - prevsize = os.path.getsize(filename) - self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) + prevsize = os.path.getsize(tmpfilename) + self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) - cursize = os.path.getsize(filename) + cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == 1: break if retval == 0: - self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) + self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename)) + self.try_rename(tmpfilename, filename) return True else: self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False def _do_download(self, filename, url, player_url): + # Check file already present + if self.params.get('continuedl', False) and os.path.isfile(filename): + self.report_file_already_downloaded(filename) + return True + # Attempt to download using rtmpdump if url.startswith('rtmp'): return self._download_with_rtmpdump(filename, url, player_url) + tmpfilename = self.temp_name(filename) stream = None open_mode = 'wb' basic_request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers) # Establish possible resume length - if os.path.isfile(filename): - resume_len = os.path.getsize(filename) + if os.path.isfile(tmpfilename): + resume_len = os.path.getsize(tmpfilename) else: resume_len = 0 @@ -553,6 +589,7 @@ class FileDownloader(object): # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. self.report_file_already_downloaded(filename) + self.try_rename(tmpfilename, filename) return True else: # The length does not match, we start the download over @@ -586,7 +623,7 @@ class FileDownloader(object): # Open file just in time if stream is None: try: - (stream, filename) = sanitize_open(filename, open_mode) + (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) self.report_destination(filename) except (OSError, IOError), err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) @@ -607,9 +644,11 @@ class FileDownloader(object): # Apply rate limit self.slow_down(start, byte_counter) + stream.close() self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ContentTooShortError(byte_counter, long(data_len)) + self.try_rename(tmpfilename, filename) return True class InfoExtractor(object): @@ -686,9 +725,9 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' - _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' + _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality @@ -710,35 +749,35 @@ class YoutubeIE(InfoExtractor): def report_lang(self): """Report attempt to set language.""" - self._downloader.to_stdout(u'[youtube] Setting language') + self._downloader.to_screen(u'[youtube] Setting language') def report_login(self): """Report attempt to log in.""" - self._downloader.to_stdout(u'[youtube] Logging in') + self._downloader.to_screen(u'[youtube] Logging in') def report_age_confirmation(self): """Report attempt to confirm age.""" - self._downloader.to_stdout(u'[youtube] Confirming age') + self._downloader.to_screen(u'[youtube] Confirming age') def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" - self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id) def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" - self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) + self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" - self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format)) def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" - self._downloader.to_stdout(u'[youtube] RTMP download detected') + self._downloader.to_screen(u'[youtube] RTMP download detected') def _real_initialize(self): if self._downloader is None: @@ -819,7 +858,7 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -827,9 +866,9 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: - player_url = mobj.group(1) + player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) else: player_url = None @@ -882,6 +921,18 @@ class YoutubeIE(InfoExtractor): else: # don't panic if we can't find it video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + # upload date + upload_date = u'NA' + mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) + if mobj is not None: + upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) + format_expressions = ['%d %B %Y', '%B %d %Y'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') + except: + pass + # description video_description = 'No description available.' if self._downloader.params.get('forcedescription', False): @@ -893,7 +944,7 @@ class YoutubeIE(InfoExtractor): video_token = urllib.unquote_plus(video_info['token'][0]) # Decide which formats to download - requested_format = self._downloader.params.get('format', None) + req_format = self._downloader.params.get('format', None) get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) if 'fmt_url_map' in video_info: @@ -907,12 +958,15 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - if requested_format is None: - video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality - elif requested_format == '-1': - video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats + if req_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == '-1': + video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + if req_format in url_map: + video_url_list = [(req_format, url_map[req_format])] # Specific format + else: + video_url_list = [(req_format, get_video_template % req_format)] # Specific format elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() @@ -936,6 +990,7 @@ class YoutubeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'upload_date': upload_date, 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -966,19 +1021,19 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): """Report disclaimer retrieval.""" - self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') + self._downloader.to_screen(u'[metacafe] Retrieving disclaimer') def report_age_confirmation(self): """Report attempt to confirm age.""" - self._downloader.to_stdout(u'[metacafe] Confirming age') + self._downloader.to_screen(u'[metacafe] Confirming age') def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id) def _real_initialize(self): # Retrieve disclaimer @@ -1082,6 +1137,7 @@ class MetacafeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1106,11 +1162,11 @@ class DailymotionIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1170,6 +1226,7 @@ class DailymotionIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1193,11 +1250,11 @@ class GoogleIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1279,6 +1336,7 @@ class GoogleIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1303,11 +1361,11 @@ class PhotobucketIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1360,6 +1418,7 @@ class PhotobucketIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1387,11 +1446,11 @@ class YahooIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1514,6 +1573,7 @@ class YahooIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1539,12 +1599,12 @@ class GenericIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.') - self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.') + self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1567,6 +1627,7 @@ class GenericIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + self.report_extraction(video_id) # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: @@ -1616,6 +1677,7 @@ class GenericIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1646,7 +1708,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -1737,7 +1799,7 @@ class GoogleSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._google_ie.initialize() @@ -1828,7 +1890,7 @@ class YahooSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._yahoo_ie.initialize() @@ -1918,7 +1980,7 @@ class YoutubePlaylistIE(InfoExtractor): def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" - self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -1955,11 +2017,10 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 - playliststart = self._downloader.params.get('playliststart', 1) - playliststart -= 1 #our arrays are zero-based but the playlist is 1-based - if playliststart > 0: - video_ids = video_ids[playliststart:] - + playliststart = self._downloader.params.get('playliststart', 1) - 1 + playlistend = self._downloader.params.get('playlistend', -1) + video_ids = video_ids[playliststart:playlistend] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1982,7 +2043,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username): """Report attempt to download user page.""" - self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) + self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2015,15 +2076,93 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - playliststart = self._downloader.params.get('playliststart', 1) - playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based - if playliststart > 0: - video_ids = video_ids[playliststart:] + playliststart = self._downloader.params.get('playliststart', 1) - 1 + playlistend = self._downloader.params.get('playlistend', -1) + video_ids = video_ids[playliststart:playlistend] for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class DepositFilesIE(InfoExtractor): + """Information extractor for depositfiles.com""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DepositFilesIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # At this point we have a new file + self._downloader.increment_downloads() + + file_id = url.split('/')[-1] + # Rebuild url in english locale + url = 'http://depositfiles.com/en/files/' + file_id + + # Retrieve file webpage with 'Free download' button pressed + free_download_indication = { 'gateway_result' : '1' } + request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) + try: + self.report_download_webpage(file_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + return + + # Search for the real file URL + mobj = re.search(r'
                (Attention.*?)', webpage, re.DOTALL) + if (mobj is not None) and (mobj.group(1) is not None): + restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() + self._downloader.trouble(u'ERROR: %s' % restriction_message) + else: + self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) + return + + file_url = mobj.group(1) + file_extension = os.path.splitext(file_url)[1][1:] + + # Search for file title + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + file_title = mobj.group(1).decode('utf-8') + + try: + # Process file information + self._downloader.process_info({ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': file_title, + 'stitle': file_title, + 'ext': file_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download file') + class PostProcessor(object): """Post Processor class. @@ -2083,25 +2222,20 @@ if __name__ == '__main__': if not os.access (filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) - downloader.to_stdout('Updating to latest stable version...') - latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + downloader.to_screen('Updating to latest stable version...') + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version newcontent = urllib.urlopen(prog_url).read() stream = open(filename, 'w') stream.write(newcontent) stream.close() - downloader.to_stdout('Updated to version %s' % latest_version) - - # General configuration - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) - urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + downloader.to_screen('Updated to version %s' % latest_version) # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.10.03', + version='2010.12.09', conflict_handler='resolve', ) @@ -2119,6 +2253,8 @@ if __name__ == '__main__': dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) parser.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + parser.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2164,6 +2300,8 @@ if __name__ == '__main__': action='store_true', dest='usetitle', help='use title in file name', default=False) filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', @@ -2172,10 +2310,29 @@ if __name__ == '__main__': action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') parser.add_option_group(filesystem) (opts, args) = parser.parse_args() + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: + try: + jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) + urllib2.install_opener(urllib2.build_opener(cookie_processor)) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -2186,7 +2343,7 @@ if __name__ == '__main__': batchfd = open(opts.batchfile, 'r') batchurls = batchfd.readlines() batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args @@ -2198,8 +2355,8 @@ if __name__ == '__main__': parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): - parser.error(u'using output template conflicts with using title or literal title') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') if opts.usetitle and opts.useliteral: parser.error(u'using title conflicts with using literal title') if opts.username is not None and opts.password is None: @@ -2214,11 +2371,18 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') - if opts.playliststart is not None: - try: - opts.playliststart = long(opts.playliststart) - except (TypeError, ValueError), err: - parser.error(u'invalid playlist page specified') + try: + opts.playliststart = long(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = long(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') # Information extractors youtube_ie = YoutubeIE() @@ -2232,6 +2396,7 @@ if __name__ == '__main__': photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() yahoo_search_ie = YahooSearchIE(yahoo_ie) + deposit_files_ie = DepositFilesIE() generic_ie = GenericIE() # File downloader @@ -2251,8 +2416,11 @@ if __name__ == '__main__': or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, @@ -2261,6 +2429,8 @@ if __name__ == '__main__': 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'logtostderr': opts.outtmpl == '-', }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) @@ -2273,6 +2443,7 @@ if __name__ == '__main__': fd.add_info_extractor(photobucket_ie) fd.add_info_extractor(yahoo_ie) fd.add_info_extractor(yahoo_search_ie) + fd.add_info_extractor(deposit_files_ie) # This must come last since it's the # fallback if none of the others work @@ -2289,6 +2460,14 @@ if __name__ == '__main__': else: sys.exit() retcode = fd.download(all_urls) + + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') + sys.exit(retcode) except DownloadError: diff --git a/youtube-dl.spec b/youtube-dl.spec index 9c3a358..41a986e 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.10.24 +Version: 2010.12.09 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Sun Dec 12 2010 Till Maas - 2010.12.09-1 +- Update to latest release to adjust with youtube changes + * Sat Nov 06 2010 Till Maas - 2010.10.24-1 - Update to latest release - Adjust to new upstream location at github instead of bitbucket From af7dea07c6b5903e254dfb0fcbef21d20efb974b Mon Sep 17 00:00:00 2001 From: Till Maas Date: Mon, 31 Jan 2011 23:00:15 +0100 Subject: [PATCH 038/279] update to new release --- youtube-dl | 475 +++++++++++++++++++++++++++++++++--------------- youtube-dl.spec | 7 +- 2 files changed, 337 insertions(+), 145 deletions(-) diff --git a/youtube-dl b/youtube-dl index a8e3bd3..a4c8f24 100644 --- a/youtube-dl +++ b/youtube-dl @@ -4,9 +4,14 @@ # Author: Danny Colligan # Author: Benjamin Johnson # Author: Vasyl' Vavrychuk +# Author: Witold Baryluk +# Author: Paweł Paprota # License: Public domain code import cookielib +import ctypes import datetime +import email.utils +import gzip import htmlentitydefs import httplib import locale @@ -17,11 +22,13 @@ import os.path import re import socket import string +import StringIO import subprocess import sys import time import urllib import urllib2 +import zlib # parse_qs was moved from the cgi module to the urlparse module recently. try: @@ -30,9 +37,10 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-us,en;q=0.5', } @@ -56,7 +64,7 @@ def preferredencoding(): def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. - + This function receives a match object and is intended to be used with the re.sub() function. """ @@ -111,9 +119,17 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) +def timeconvert(timestr): + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. - + This exception may be thrown by FileDownloader objects if they are not configured to continue on errors. They will contain the appropriate error message. @@ -159,6 +175,64 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected +class YoutubeDLHandler(urllib2.HTTPHandler): + """Handler for HTTP requests and responses. + + This class, when installed with an OpenerDirector, automatically adds + the standard headers to every HTTP request and handles gzipped and + deflated responses from web servers. If compression is to be avoided in + a particular request, the original request in the program code only has + to include the HTTP header "Youtubedl-No-Compression", which will be + removed before making the real request. + + Part of this code was copied from: + + http://techknack.net/python-urllib2-handlers/ + + Andrew Rowls, the author of that code, agreed to release it to the + public domain. + """ + + @staticmethod + def deflate(data): + try: + return zlib.decompress(data, -zlib.MAX_WBITS) + except zlib.error: + return zlib.decompress(data) + + @staticmethod + def addinfourl_wrapper(stream, headers, url, code): + if hasattr(urllib2.addinfourl, 'getcode'): + return urllib2.addinfourl(stream, headers, url, code) + ret = urllib2.addinfourl(stream, headers, url) + ret.code = code + return ret + + def http_request(self, req): + for h in std_headers: + if h in req.headers: + del req.headers[h] + req.add_header(h, std_headers[h]) + if 'Youtubedl-no-compression' in req.headers: + if 'Accept-encoding' in req.headers: + del req.headers['Accept-encoding'] + del req.headers['Youtubedl-no-compression'] + return req + + def http_response(self, req, resp): + old_resp = resp + # gzip + if resp.headers.get('Content-encoding', '') == 'gzip': + gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') + resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + # deflate + if resp.headers.get('Content-encoding', '') == 'deflate': + gz = StringIO.StringIO(self.deflate(resp.read())) + resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + return resp + class FileDownloader(object): """File Downloader class. @@ -194,6 +268,7 @@ class FileDownloader(object): forcetitle: Force printing title. forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. + forcefilename: Force printing final filename. simulate: Do not download the video files. format: Video format code. format_limit: Highest quality format to try. @@ -207,6 +282,9 @@ class FileDownloader(object): playliststart: Playlist item to start at. playlistend: Playlist item to end at. logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. """ params = None @@ -224,7 +302,7 @@ class FileDownloader(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - + @staticmethod def pmkdir(filename): """Create directory components in filename. Similar to Unix "mkdir -p".""" @@ -234,14 +312,7 @@ class FileDownloader(object): for dir in aggregate: if not os.path.exists(dir): os.mkdir(dir) - - @staticmethod - def temp_name(filename): - """Returns a temporary filename for the given filename.""" - if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)): - return filename - return filename + u'.part' - + @staticmethod def format_bytes(bytes): if bytes is None: @@ -310,12 +381,12 @@ class FileDownloader(object): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) ie.set_downloader(self) - + def add_post_processor(self, pp): """Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) pp.set_downloader(self) - + def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False): """Print message to stdout if not in quiet mode.""" try: @@ -326,11 +397,22 @@ class FileDownloader(object): except (UnicodeEncodeError), err: if not ignore_encoding_errors: raise - + def to_stderr(self, message): """Print message to stderr.""" print >>sys.stderr, message.encode(preferredencoding()) - + + def to_cons_title(self, message): + """Set console/terminal window title to message.""" + if not self.params.get('consoletitle', False): + return + if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + elif 'TERM' in os.environ: + sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding())) + def fixed_template(self): """Checks if the output template is fixed.""" return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) @@ -360,7 +442,19 @@ class FileDownloader(object): speed = float(byte_counter) / elapsed if speed > rate_limit: time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) - + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == u'-' or \ + (os.path.exists(filename) and not os.path.isfile(filename)): + return filename + return filename + u'.part' + + def undo_temp_name(self, filename): + if filename.endswith(u'.part'): + return filename[:-len(u'.part')] + return filename + def try_rename(self, old_filename, new_filename): try: if old_filename == new_filename: @@ -368,50 +462,82 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(filename): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return + try: + os.utime(filename,(time.time(), filetime)) + except: + pass def report_destination(self, filename): """Report destination filename.""" self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) - + def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" if self.params.get('noprogress', False): return self.to_screen(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % + (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen(u'[download] Resuming download at byte %s' % resume_len) - + def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) - + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) except (UnicodeEncodeError), err: self.to_screen(u'[download] The file has already been downloaded') - + def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen(u'[download] Unable to resume') - + def report_finish(self): """Report download finished.""" if self.params.get('noprogress', False): self.to_screen(u'[download] Download completed') else: self.to_screen(u'') - + def increment_downloads(self): """Increment the ordinal that assigns a number to each file.""" self._num_downloads += 1 + def prepare_filename(self, info_dict): + """Generate the output filename.""" + try: + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + filename = self.params['outtmpl'] % template_dict + return filename + except (ValueError, KeyError), err: + self.trouble(u'ERROR: invalid system charset or erroneous output template') + return None + def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + filename = self.prepare_filename(info_dict) # Do nothing else if in simulate mode if self.params.get('simulate', False): # Forced printings @@ -423,16 +549,12 @@ class FileDownloader(object): print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forcedescription', False) and 'description' in info_dict: print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcefilename', False) and filename is not None: + print filename.encode(preferredencoding(), 'xmlcharrefreplace') return - - try: - template_dict = dict(info_dict) - template_dict['epoch'] = unicode(long(time.time())) - template_dict['autonumber'] = unicode('%05d' % self._num_downloads) - filename = self.params['outtmpl'] % template_dict - except (ValueError, KeyError), err: - self.trouble(u'ERROR: invalid system charset or erroneous output template') + + if filename is None: return if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') @@ -496,7 +618,7 @@ class FileDownloader(object): info = pp.run(info) if info is None: break - + def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -531,7 +653,7 @@ class FileDownloader(object): def _do_download(self, filename, url, player_url): # Check file already present - if self.params.get('continuedl', False) and os.path.isfile(filename): + if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) return True @@ -542,8 +664,11 @@ class FileDownloader(object): tmpfilename = self.temp_name(filename) stream = None open_mode = 'wb' - basic_request = urllib2.Request(url, None, std_headers) - request = urllib2.Request(url, None, std_headers) + + # Do not include the Accept-Encoding header + headers = {'Youtubedl-no-compression': 'True'} + basic_request = urllib2.Request(url, None, headers) + request = urllib2.Request(url, None, headers) # Establish possible resume length if os.path.isfile(tmpfilename): @@ -606,8 +731,10 @@ class FileDownloader(object): return False data_len = data.info().get('Content-length', None) + if data_len is not None: + data_len = long(data_len) + resume_len data_len_str = self.format_bytes(data_len) - byte_counter = 0 + byte_counter = 0 + resume_len block_size = 1024 start = time.time() while True: @@ -615,15 +742,15 @@ class FileDownloader(object): before = time.time() data_block = data.read(block_size) after = time.time() - data_block_len = len(data_block) - if data_block_len == 0: + if len(data_block) == 0: break - byte_counter += data_block_len + byte_counter += len(data_block) # Open file just in time if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) @@ -633,22 +760,27 @@ class FileDownloader(object): except (IOError, OSError), err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, data_block_len) + block_size = self.best_block_size(after - before, len(data_block)) # Progress message percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) - speed_str = self.calc_speed(start, time.time(), byte_counter) + eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) self.report_progress(percent_str, data_len_str, speed_str, eta_str) # Apply rate limit - self.slow_down(start, byte_counter) + self.slow_down(start, byte_counter - resume_len) stream.close() self.report_finish() - if data_len is not None and str(byte_counter) != data_len: + if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, long(data_len)) self.try_rename(tmpfilename, filename) + + # Update file modification time + if self.params.get('updatetime', True): + self.try_utime(filename, data.info().get('last-modified', None)) + return True class InfoExtractor(object): @@ -713,7 +845,7 @@ class InfoExtractor(object): def set_downloader(self, downloader): """Sets the downloader for this IE.""" self._downloader = downloader - + def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" pass @@ -725,7 +857,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -754,31 +886,31 @@ class YoutubeIE(InfoExtractor): def report_login(self): """Report attempt to log in.""" self._downloader.to_screen(u'[youtube] Logging in') - + def report_age_confirmation(self): """Report attempt to confirm age.""" self._downloader.to_screen(u'[youtube] Confirming age') - + def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id) - + def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id) - + def report_information_extraction(self, video_id): """Report attempt to extract video information.""" self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) - + def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format)) - + def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - + def _real_initialize(self): if self._downloader is None: return @@ -804,7 +936,7 @@ class YoutubeIE(InfoExtractor): return # Set language - request = urllib2.Request(self._LANG_URL, None, std_headers) + request = urllib2.Request(self._LANG_URL) try: self.report_lang() urllib2.urlopen(request).read() @@ -824,7 +956,7 @@ class YoutubeIE(InfoExtractor): 'username': username, 'password': password, } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) try: self.report_login() login_results = urllib2.urlopen(request).read() @@ -834,13 +966,13 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) return - + # Confirm age age_form = { 'next_url': '/', 'action_confirm': 'Confirm', } - request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) + request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) try: self.report_age_confirmation() age_results = urllib2.urlopen(request).read() @@ -858,7 +990,7 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -877,7 +1009,7 @@ class YoutubeIE(InfoExtractor): for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) + request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() video_info = parse_qs(video_info_webpage) @@ -945,7 +1077,6 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) @@ -963,10 +1094,11 @@ class YoutubeIE(InfoExtractor): elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - if req_format in url_map: - video_url_list = [(req_format, url_map[req_format])] # Specific format - else: - video_url_list = [(req_format, get_video_template % req_format)] # Specific format + # Specific format + if req_format not in url_map: + self._downloader.trouble(u'ERROR: requested format not available') + return + video_url_list = [(req_format, url_map[req_format])] # Specific format elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() @@ -1000,7 +1132,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') + self._downloader.trouble(u'\nERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1026,18 +1158,18 @@ class MetacafeIE(InfoExtractor): def report_age_confirmation(self): """Report attempt to confirm age.""" self._downloader.to_screen(u'[metacafe] Confirming age') - + def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id) - + def report_extraction(self, video_id): """Report information extraction.""" self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id) def _real_initialize(self): # Retrieve disclaimer - request = urllib2.Request(self._DISCLAIMER, None, std_headers) + request = urllib2.Request(self._DISCLAIMER) try: self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() @@ -1050,14 +1182,14 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) try: self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) return - + def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@ -1093,7 +1225,7 @@ class MetacafeIE(InfoExtractor): if mobj is not None: mediaURL = urllib.unquote(mobj.group(1)) video_extension = mediaURL[-3:] - + # Extract gdaKey if available mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) if mobj is None: @@ -1145,7 +1277,7 @@ class MetacafeIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1163,7 +1295,7 @@ class DailymotionIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) - + def report_extraction(self, video_id): """Report information extraction.""" self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) @@ -1214,7 +1346,7 @@ class DailymotionIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)
                .*?(.+?)', webpage) + mobj = re.search(r'(?im)(.+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -1234,7 +1366,7 @@ class DailymotionIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1344,7 +1476,7 @@ class GoogleIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1426,7 +1558,7 @@ class PhotobucketIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1584,7 +1716,7 @@ class YahooIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1685,7 +1817,7 @@ class GenericIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): @@ -1700,7 +1832,7 @@ class YoutubeSearchIE(InfoExtractor): def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) @@ -1712,7 +1844,7 @@ class YoutubeSearchIE(InfoExtractor): def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1753,7 +1885,7 @@ class YoutubeSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1791,7 +1923,7 @@ class GoogleSearchIE(InfoExtractor): def __init__(self, google_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._google_ie = google_ie - + @staticmethod def suitable(url): return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) @@ -1803,7 +1935,7 @@ class GoogleSearchIE(InfoExtractor): def _real_initialize(self): self._google_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1844,7 +1976,7 @@ class GoogleSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1882,7 +2014,7 @@ class YahooSearchIE(InfoExtractor): def __init__(self, yahoo_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._yahoo_ie = yahoo_ie - + @staticmethod def suitable(url): return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) @@ -1894,7 +2026,7 @@ class YahooSearchIE(InfoExtractor): def _real_initialize(self): self._yahoo_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1935,7 +2067,7 @@ class YahooSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1964,7 +2096,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -1973,7 +2105,7 @@ class YoutubePlaylistIE(InfoExtractor): def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) @@ -1984,7 +2116,7 @@ class YoutubePlaylistIE(InfoExtractor): def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, url): # Extract playlist id mobj = re.match(self._VALID_URL, url) @@ -1999,7 +2131,7 @@ class YoutubePlaylistIE(InfoExtractor): while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2028,26 +2160,29 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' - _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. + _GDATA_PAGE_SIZE = 50 + _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubeUserIE._VALID_URL, url) is not None) - def report_download_page(self, username): + def report_download_page(self, username, start_index): """Report attempt to download user page.""" - self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) + self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, url): # Extract username mobj = re.match(self._VALID_URL, url) @@ -2055,34 +2190,63 @@ class YoutubeUserIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid url: %s' % url) return - # Download user page username = mobj.group(1) + + # Download video ids using YouTube Data API. Result size per + # query is limited (currently to 50 videos) so we need to query + # page by page until there are no video ids - it means we got + # all of them. + video_ids = [] - pagenum = 1 + pagenum = 0 - self.report_download_page(username) - request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) - try: - page = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) - return + while True: + start_index = pagenum * self._GDATA_PAGE_SIZE + 1 + self.report_download_page(username, start_index) + + request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) + + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + + video_ids.extend(ids_in_page) - # Extract video identifiers - ids_in_page = [] + # A little optimization - if current page is not + # "full", ie. does not contain PAGE_SIZE video ids then + # we can assume that this page is the last one - there + # are no more ids on further pages - no need to query + # again. - for mobj in re.finditer(self._VIDEO_INDICATOR, page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - video_ids.extend(ids_in_page) + if len(ids_in_page) < self._GDATA_PAGE_SIZE: + break + + pagenum += 1 + all_ids_count = len(video_ids) playliststart = self._downloader.params.get('playliststart', 1) - 1 playlistend = self._downloader.params.get('playlistend', -1) - video_ids = video_ids[playliststart:playlistend] - for id in video_ids: - self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) - return + if playlistend == -1: + video_ids = video_ids[playliststart:] + else: + video_ids = video_ids[playliststart:playlistend] + + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % + (username, all_ids_count, len(video_ids))) + + for video_id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) + class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" @@ -2117,7 +2281,7 @@ class DepositFilesIE(InfoExtractor): # Retrieve file webpage with 'Free download' button pressed free_download_indication = { 'gateway_result' : '1' } - request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) + request = urllib2.Request(url, urllib.urlencode(free_download_indication)) try: self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() @@ -2188,7 +2352,7 @@ class PostProcessor(object): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader - + def run(self, information): """Run the PostProcessor. @@ -2208,7 +2372,7 @@ class PostProcessor(object): it was called from. """ return information # by default, do nothing - + ### MAIN PROGRAM ### if __name__ == '__main__': try: @@ -2216,26 +2380,32 @@ if __name__ == '__main__': import getpass import optparse - # Function to update the program file with the latest version from bitbucket.org + # Function to update the program file with the latest version from the repository. def update_self(downloader, filename): # Note: downloader only used for options - if not os.access (filename, os.W_OK): + if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) downloader.to_screen('Updating to latest stable version...') - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() + try: + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') + try: + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') downloader.to_screen('Updated to version %s' % latest_version) # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.12.09', + version='2011.01.30', conflict_handler='resolve', ) @@ -2255,6 +2425,9 @@ if __name__ == '__main__': dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) parser.add_option('--playlist-end', dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + parser.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2268,14 +2441,10 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-m', '--mobile-version', - action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - video_format.add_option('-b', '--best-quality', - action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2288,11 +2457,19 @@ if __name__ == '__main__': verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) verbosity.add_option('--get-description', - action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -2301,7 +2478,8 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', @@ -2312,6 +2490,11 @@ if __name__ == '__main__': action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) filesystem.add_option('--cookies', dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) parser.add_option_group(filesystem) (opts, args) = parser.parse_args() @@ -2327,10 +2510,14 @@ if __name__ == '__main__': except (IOError, OSError), err: sys.exit(u'ERROR: unable to open cookie file') + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + # General configuration cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) - urllib2.install_opener(urllib2.build_opener(cookie_processor)) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) # Batch file verification @@ -2349,8 +2536,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if opts.bestquality: - print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2404,12 +2589,13 @@ if __name__ == '__main__': 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), + 'forcefilename': opts.getfilename, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), 'format': opts.format, 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) @@ -2431,6 +2617,9 @@ if __name__ == '__main__': 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index 41a986e..1cc3616 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,12 +1,12 @@ Name: youtube-dl -Version: 2010.12.09 +Version: 2011.01.30 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia License: Public Domain URL: http://rg3.github.com/youtube-dl/ -Source0: http://github.com/rg3/youtube-dl/raw/%{version}/youtube-dl +Source0: https://github.com/rg3/youtube-dl/raw/%{version}/youtube-dl BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch Requires: python >= 2.4 @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Mon Jan 31 2011 Till Maas - 2010.01.30-1 +- Update to latest release + * Sun Dec 12 2010 Till Maas - 2010.12.09-1 - Update to latest release to adjust with youtube changes From f9d6d1b83beb2b94c3a92d8837f601e535b2afe7 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Tue, 8 Feb 2011 01:47:12 -0600 Subject: [PATCH 039/279] - Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild --- youtube-dl.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 1cc3616..93be43d 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl Version: 2011.01.30 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Tue Feb 08 2011 Fedora Release Engineering - 2011.01.30-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + * Mon Jan 31 2011 Till Maas - 2010.01.30-1 - Update to latest release From b2f9dc0fb0d3917cf90ac5422d6691d8c860f908 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 13 May 2011 23:57:48 +0200 Subject: [PATCH 040/279] Update to latest release --- youtube-dl | 354 ++++++++++++++++++++++++++++++++++++++++++++++-- youtube-dl.spec | 7 +- 2 files changed, 349 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index a4c8f24..3ac27a8 100644 --- a/youtube-dl +++ b/youtube-dl @@ -6,6 +6,7 @@ # Author: Vasyl' Vavrychuk # Author: Witold Baryluk # Author: Paweł Paprota +# Author: Gergely Imreh # License: Public domain code import cookielib import ctypes @@ -37,7 +38,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -857,7 +858,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -1055,10 +1056,10 @@ class YoutubeIE(InfoExtractor): # upload date upload_date = u'NA' - mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) + mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) - format_expressions = ['%d %B %Y', '%B %d %Y'] + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') @@ -1078,7 +1079,7 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info: + if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: @@ -2096,8 +2097,8 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' - _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None @@ -2124,14 +2125,26 @@ class YoutubePlaylistIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid url: %s' % url) return + # Single video case + if mobj.group(3) is not None: + self._youtube_ie.extract(mobj.group(3)) + return + # Download playlist pages - playlist_id = mobj.group(1) + # prefix is 'p' as default for playlists but there are other types that need extra care + playlist_prefix = mobj.group(1) + if playlist_prefix == 'a': + playlist_access = 'artist' + else: + playlist_prefix = 'p' + playlist_access = 'view_play_list' + playlist_id = mobj.group(2) video_ids = [] pagenum = 1 while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2327,6 +2340,229 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') +class FacebookIE(InfoExtractor): + """Information Extractor for Facebook""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P\d+)(?:.*)' + _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' + _NETRC_MACHINE = 'facebook' + _available_formats = ['highqual', 'lowqual'] + _video_extensions = { + 'highqual': 'mp4', + 'lowqual': 'mp4', + } + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(FacebookIE._VALID_URL, url) is not None) + + def _reporter(self, message): + """Add header and report message.""" + self._downloader.to_screen(u'[facebook] %s' % message) + + def report_login(self): + """Report attempt to log in.""" + self._reporter(u'Logging in') + + def report_video_webpage_download(self, video_id): + """Report attempt to download video webpage.""" + self._reporter(u'%s: Downloading video webpage' % video_id) + + def report_information_extraction(self, video_id): + """Report attempt to extract video information.""" + self._reporter(u'%s: Extracting video information' % video_id) + + def _parse_page(self, video_webpage): + """Extract video information from page""" + # General data + data = {'title': r'class="video_title datawrap">(.*?)(.*?)
                ', + 'owner': r'\("video_owner_name", "(.*?)"\)', + 'upload_date': r'data-date="(.*?)"', + 'thumbnail': r'\("thumb_url", "(?P.*?)"\)', + } + video_info = {} + for piece in data.keys(): + mobj = re.search(data[piece], video_webpage) + if mobj is not None: + video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) + + # Video urls + video_urls = {} + for fmt in self._available_formats: + mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage) + if mobj is not None: + # URL is in a Javascript segment inside an escaped Unicode format within + # the generally utf-8 page + video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) + video_info['video_urls'] = video_urls + + return video_info + + def _real_initialize(self): + if self._downloader is None: + return + + useremail = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + useremail = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + useremail = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + return + + if useremail is None: + return + + # Log in + login_form = { + 'email': useremail, + 'pass': password, + 'login': 'Log+In' + } + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + try: + self.report_login() + login_results = urllib2.urlopen(request).read() + if re.search(r'', login_results) is not None: + self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + return + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('ID') + + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id) + try: + page = urllib2.urlopen(request) + video_webpage = page.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + # Start extracting information + self.report_information_extraction(video_id) + + # Extract information + video_info = self._parse_page(video_webpage) + + # uploader + if 'owner' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = video_info['owner'] + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = video_info['title'] + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: + video_thumbnail = video_info['thumbnail'] + + # upload date + upload_date = u'NA' + if 'upload_date' in video_info: + upload_time = video_info['upload_date'] + timetuple = email.utils.parsedate_tz(upload_time) + if timetuple is not None: + try: + upload_date = time.strftime('%Y%m%d', timetuple[0:9]) + except: + pass + + # description + video_description = 'No description available.' + if (self._downloader.params.get('forcedescription', False) and + 'description' in video_info): + video_description = video_info['description'] + + url_map = video_info['video_urls'] + if len(url_map.keys()) > 0: + # Decide which formats to download + req_format = self._downloader.params.get('format', None) + format_limit = self._downloader.params.get('format_limit', None) + + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] + else: + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') + return + if req_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == '-1': + video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats + else: + # Specific format + if req_format not in url_map: + self._downloader.trouble(u'ERROR: requested format not available') + return + video_url_list = [(req_format, url_map[req_format])] # Specific format + + for format_param, video_real_url in video_url_list: + + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'mp4') + + # Find the video URL in fmt_url_map or conn paramters + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'upload_date': upload_date, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description.decode('utf-8'), + 'player_url': None, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + class PostProcessor(object): """Post Processor class. @@ -2373,6 +2609,88 @@ class PostProcessor(object): """ return information # by default, do nothing +class FFmpegExtractAudioPP(PostProcessor): + + def __init__(self, downloader=None, preferredcodec=None): + PostProcessor.__init__(self, downloader) + if preferredcodec is None: + preferredcodec = 'best' + self._preferredcodec = preferredcodec + + @staticmethod + def get_audio_codec(path): + try: + cmd = ['ffprobe', '-show_streams', '--', path] + handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + + @staticmethod + def run_ffmpeg(path, out_path, codec, more_opts): + try: + cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path] + ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT) + return (ret == 0) + except (IOError, OSError): + return False + + def run(self, information): + path = information['filepath'] + + filecodec = self.get_audio_codec(path) + if filecodec is None: + self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') + return None + + more_opts = [] + if self._preferredcodec == 'best' or self._preferredcodec == filecodec: + if filecodec == 'aac' or filecodec == 'mp3': + # Lossless if possible + acodec = 'copy' + extension = filecodec + if filecodec == 'aac': + more_opts = ['-f', 'adts'] + else: + # MP3 otherwise. + acodec = 'libmp3lame' + extension = 'mp3' + more_opts = ['-ab', '128k'] + else: + # We convert the audio (lossy) + acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] + extension = self._preferredcodec + more_opts = ['-ab', '128k'] + if self._preferredcodec == 'aac': + more_opts += ['-f', 'adts'] + + (prefix, ext) = os.path.splitext(path) + new_path = prefix + '.' + extension + self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path) + status = self.run_ffmpeg(path, new_path, acodec, more_opts) + + if not status: + self._downloader.to_stderr(u'WARNING: error running ffmpeg') + return None + + try: + os.remove(path) + except (IOError, OSError): + self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') + return None + + information['filepath'] = new_path + return information + ### MAIN PROGRAM ### if __name__ == '__main__': try: @@ -2405,7 +2723,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.01.30', + version='2011.03.29', conflict_handler='resolve', ) @@ -2497,6 +2815,13 @@ if __name__ == '__main__': help='do not use the Last-modified header to set the file modification time', default=True) parser.add_option_group(filesystem) + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac" or "mp3"; best by default') + parser.add_option_group(postproc) + (opts, args) = parser.parse_args() # Open appropriate CookieJar @@ -2568,6 +2893,9 @@ if __name__ == '__main__': raise ValueError except (TypeError, ValueError), err: parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3']: + parser.error(u'invalid audio format specified') # Information extractors youtube_ie = YoutubeIE() @@ -2582,6 +2910,7 @@ if __name__ == '__main__': yahoo_ie = YahooIE() yahoo_search_ie = YahooSearchIE(yahoo_ie) deposit_files_ie = DepositFilesIE() + facebook_ie = FacebookIE() generic_ie = GenericIE() # File downloader @@ -2633,11 +2962,16 @@ if __name__ == '__main__': fd.add_info_extractor(yahoo_ie) fd.add_info_extractor(yahoo_search_ie) fd.add_info_extractor(deposit_files_ie) + fd.add_info_extractor(facebook_ie) # This must come last since it's the # fallback if none of the others work fd.add_info_extractor(generic_ie) + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + # Update version if opts.update_self: update_self(fd, sys.argv[0]) diff --git a/youtube-dl.spec b/youtube-dl.spec index 93be43d..c2f7782 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,6 +1,6 @@ Name: youtube-dl -Version: 2011.01.30 -Release: 2%{?dist} +Version: 2011.03.29 +Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com Group: Applications/Multimedia @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Fri May 13 2011 Till Maas - 2011.03.29-1 +- Update to latest release + * Tue Feb 08 2011 Fedora Release Engineering - 2011.01.30-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild From a9d5b625451fb36ba2b38b8b640db548625afc05 Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 4 Aug 2011 23:29:39 +0200 Subject: [PATCH 041/279] Update to latest release to adjust to backend changes (Red Hat Bug #728378) --- youtube-dl | 10 ++++++---- youtube-dl.spec | 6 +++++- 2 files changed, 11 insertions(+), 5 deletions(-) mode change 100644 => 100755 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100644 new mode 100755 index 3ac27a8..e8b19c8 --- a/youtube-dl +++ b/youtube-dl @@ -38,7 +38,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -1079,8 +1079,10 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: - url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') + url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -2723,7 +2725,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.03.29', + version='2011.08.04', conflict_handler='resolve', ) diff --git a/youtube-dl.spec b/youtube-dl.spec index c2f7782..9c0aa97 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2011.03.29 +Version: 2011.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -37,6 +37,10 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Thu Aug 04 2011 Till Maas - 2011.08.04-1 +- Update to latest release to adjust to backend changes (Red Hat Bug + #728378) + * Fri May 13 2011 Till Maas - 2011.03.29-1 - Update to latest release From c80d53a6078b566648ea297e4767186a11326fae Mon Sep 17 00:00:00 2001 From: Till Maas Date: Sat, 22 Oct 2011 12:51:19 +0200 Subject: [PATCH 042/279] Update to latest release --- youtube-dl | 2098 +++++++++++++++++++++++++++++++++++------------ youtube-dl.spec | 5 +- 2 files changed, 1579 insertions(+), 524 deletions(-) diff --git a/youtube-dl b/youtube-dl index e8b19c8..3a37fae 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,19 +1,29 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# License: Public domain code + +__author__ = ( + 'Ricardo Garcia Gonzalez', + 'Danny Colligan', + 'Benjamin Johnson', + 'Vasyl\' Vavrychuk', + 'Witold Baryluk', + 'Paweł Paprota', + 'Gergely Imreh', + 'Rogério Brito', + 'Philipp Hagemeister', + 'Sören Schulze', + ) + +__license__ = 'Public Domain' +__version__ = '2011.10.19' + +UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' + import cookielib -import ctypes import datetime -import email.utils import gzip import htmlentitydefs +import HTMLParser import httplib import locale import math @@ -23,20 +33,42 @@ import os.path import re import socket import string -import StringIO import subprocess import sys import time import urllib import urllib2 +import warnings import zlib +if os.name == 'nt': + import ctypes + +try: + import email.utils +except ImportError: # Python 2.4 + import email.Utils +try: + import cStringIO as StringIO +except ImportError: + import StringIO + # parse_qs was moved from the cgi module to the urlparse module recently. try: from urlparse import parse_qs except ImportError: from cgi import parse_qs +try: + import lxml.etree +except ImportError: + pass # Handled below + +try: + import xml.etree.ElementTree +except ImportError: # Python<2.5: Not officially supported, but let it slip + warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -47,6 +79,119 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: + import json +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): + import re + class json(object): + @staticmethod + def loads(s): + s = s.decode('UTF-8') + def raiseError(msg, i): + raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) + def skipSpace(i, expectMore=True): + while i < len(s) and s[i] in ' \t\r\n': + i += 1 + if expectMore: + if i >= len(s): + raiseError('Premature end', i) + return i + def decodeEscape(match): + esc = match.group(1) + _STATIC = { + '"': '"', + '\\': '\\', + '/': '/', + 'b': unichr(0x8), + 'f': unichr(0xc), + 'n': '\n', + 'r': '\r', + 't': '\t', + } + if esc in _STATIC: + return _STATIC[esc] + if esc[0] == 'u': + if len(esc) == 1+4: + return unichr(int(esc[1:5], 16)) + if len(esc) == 5+6 and esc[5:7] == '\\u': + hi = int(esc[1:5], 16) + low = int(esc[7:11], 16) + return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) + raise ValueError('Unknown escape ' + str(esc)) + def parseString(i): + i += 1 + e = i + while True: + e = s.index('"', e) + bslashes = 0 + while s[e-bslashes-1] == '\\': + bslashes += 1 + if bslashes % 2 == 1: + e += 1 + continue + break + rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') + stri = rexp.sub(decodeEscape, s[i:e]) + return (e+1,stri) + def parseObj(i): + i += 1 + res = {} + i = skipSpace(i) + if s[i] == '}': # Empty dictionary + return (i+1,res) + while True: + if s[i] != '"': + raiseError('Expected a string object key', i) + i,key = parseString(i) + i = skipSpace(i) + if i >= len(s) or s[i] != ':': + raiseError('Expected a colon', i) + i,val = parse(i+1) + res[key] = val + i = skipSpace(i) + if s[i] == '}': + return (i+1, res) + if s[i] != ',': + raiseError('Expected comma or closing curly brace', i) + i = skipSpace(i+1) + def parseArray(i): + res = [] + i = skipSpace(i+1) + if s[i] == ']': # Empty array + return (i+1,res) + while True: + i,val = parse(i) + res.append(val) + i = skipSpace(i) # Raise exception if premature end + if s[i] == ']': + return (i+1, res) + if s[i] != ',': + raiseError('Expected a comma or closing bracket', i) + i = skipSpace(i+1) + def parseDiscrete(i): + for k,v in {'true': True, 'false': False, 'null': None}.items(): + if s.startswith(k, i): + return (i+len(k), v) + raiseError('Not a boolean (or null)', i) + def parseNumber(i): + mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) + if mobj is None: + raiseError('Not a number', i) + nums = mobj.group(1) + if '.' in nums or 'e' in nums or 'E' in nums: + return (i+len(nums), float(nums)) + return (i+len(nums), int(nums)) + CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} + def parse(i): + i = skipSpace(i) + i,res = CHARMAP.get(s[i], parseNumber)(i) + i = skipSpace(i, False) + return (i,res) + i,res = parse(0) + if i < len(s): + raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') + return res + def preferredencoding(): """Get preferred encoding. @@ -63,6 +208,7 @@ def preferredencoding(): yield pref return yield_preferredencoding().next() + def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. @@ -89,11 +235,13 @@ def htmlentity_transform(matchobj): # Unknown entity in name, return its literal representation return (u'&%s;' % entity) + def sanitize_title(utitle): """Sanitizes a video title so it could be used as part of a filename.""" utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) return utitle.replace(unicode(os.sep), u'%') + def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -120,13 +268,15 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) + def timeconvert(timestr): - """Convert RFC 2822 defined time string into system timestamp""" - timestamp = None - timetuple = email.utils.parsedate_tz(timestr) - if timetuple is not None: - timestamp = email.utils.mktime_tz(timetuple) - return timestamp + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. @@ -137,6 +287,7 @@ class DownloadError(Exception): """ pass + class SameFileError(Exception): """Same File exception. @@ -145,6 +296,7 @@ class SameFileError(Exception): """ pass + class PostProcessingError(Exception): """Post Processing exception. @@ -153,6 +305,7 @@ class PostProcessingError(Exception): """ pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -161,6 +314,7 @@ class UnavailableVideoError(Exception): """ pass + class ContentTooShortError(Exception): """Content Too Short exception. @@ -176,6 +330,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(urllib2.HTTPHandler): """Handler for HTTP requests and responses. @@ -185,11 +340,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler): a particular request, the original request in the program code only has to include the HTTP header "Youtubedl-No-Compression", which will be removed before making the real request. - + Part of this code was copied from: - http://techknack.net/python-urllib2-handlers/ - + http://techknack.net/python-urllib2-handlers/ + Andrew Rowls, the author of that code, agreed to release it to the public domain. """ @@ -200,7 +355,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): return zlib.decompress(data, -zlib.MAX_WBITS) except zlib.error: return zlib.decompress(data) - + @staticmethod def addinfourl_wrapper(stream, headers, url, code): if hasattr(urllib2.addinfourl, 'getcode'): @@ -208,7 +363,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): ret = urllib2.addinfourl(stream, headers, url) ret.code = code return ret - + def http_request(self, req): for h in std_headers: if h in req.headers: @@ -234,6 +389,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): resp.msg = old_resp.msg return resp + class FileDownloader(object): """File Downloader class. @@ -282,10 +438,14 @@ class FileDownloader(object): noprogress: Do not print the progress bar. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file """ params = None @@ -304,16 +464,6 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - @staticmethod - def pmkdir(filename): - """Create directory components in filename. Similar to Unix "mkdir -p".""" - components = filename.split(os.sep) - aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] - aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator - for dir in aggregate: - if not os.path.exists(dir): - os.mkdir(dir) - @staticmethod def format_bytes(bytes): if bytes is None: @@ -325,7 +475,7 @@ class FileDownloader(object): else: exponent = long(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] - converted = float(bytes) / float(1024**exponent) + converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) @staticmethod @@ -463,7 +613,7 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') - + def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: @@ -475,11 +625,20 @@ class FileDownloader(object): return filetime = timeconvert(timestr) if filetime is None: - return + return filetime try: - os.utime(filename,(time.time(), filetime)) + os.utime(filename, (time.time(), filetime)) except: pass + return filetime + + def report_writedescription(self, descfn): + """ Report that the description file is being written """ + self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + + def report_writeinfojson(self, infofn): + """ Report that the metadata file has been written """ + self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True) def report_destination(self, filename): """Report destination filename.""" @@ -539,51 +698,100 @@ class FileDownloader(object): def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" filename = self.prepare_filename(info_dict) + + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forceurl', False): + print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcedescription', False) and 'description' in info_dict: + print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcefilename', False) and filename is not None: + print filename.encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forceformat', False): + print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Forced printings - if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') - return if filename is None: return + + matchtitle=self.params.get('matchtitle',False) + rejecttitle=self.params.get('rejecttitle',False) + title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): + self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle)) + return + if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): + self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle)) + return + if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') return try: - self.pmkdir(filename) + dn = os.path.dirname(filename) + if dn != '' and not os.path.exists(dn): + os.makedirs(dn) except (OSError, IOError), err: - self.trouble(u'ERROR: unable to create directories: %s' % str(err)) + self.trouble(u'ERROR: unable to create directory ' + unicode(err)) return - try: - success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) - except (OSError, IOError), err: - raise UnavailableVideoError - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.trouble(u'ERROR: unable to download video data: %s' % str(err)) - return - except (ContentTooShortError, ), err: - self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return + if self.params.get('writedescription', False): + try: + descfn = filename + '.description' + self.report_writedescription(descfn) + descfile = open(descfn, 'wb') + try: + descfile.write(info_dict['description'].encode('utf-8')) + finally: + descfile.close() + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write description file ' + descfn) + return + + if self.params.get('writeinfojson', False): + infofn = filename + '.info.json' + self.report_writeinfojson(infofn) + try: + json.dump + except (NameError,AttributeError): + self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') + return + try: + infof = open(infofn, 'wb') + try: + json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',)) + json.dump(json_info_dict, infof) + finally: + infof.close() + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) + return - if success: + if not self.params.get('skip_download', False): try: - self.post_process(filename, info_dict) - except (PostProcessingError), err: - self.trouble(u'ERROR: postprocessing: %s' % str(err)) + success = self._do_download(filename, info_dict) + except (OSError, IOError), err: + raise UnavailableVideoError + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return + except (ContentTooShortError, ), err: + self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + + if success: + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + self.trouble(u'ERROR: postprocessing: %s' % str(err)) + return def download(self, url_list): """Download a given list of URLs.""" @@ -644,6 +852,11 @@ class FileDownloader(object): cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == 1: break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == 2 and cursize > 1024: + self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = 0 + break if retval == 0: self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename)) self.try_rename(tmpfilename, filename) @@ -652,7 +865,10 @@ class FileDownloader(object): self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False - def _do_download(self, filename, url, player_url): + def _do_download(self, filename, info_dict): + url = info_dict['url'] + player_url = info_dict.get('player_url', None) + # Check file already present if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) @@ -664,7 +880,6 @@ class FileDownloader(object): tmpfilename = self.temp_name(filename) stream = None - open_mode = 'wb' # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} @@ -677,17 +892,22 @@ class FileDownloader(object): else: resume_len = 0 - # Request parameters in case of being able to resume - if self.params.get('continuedl', False) and resume_len != 0: - self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) - open_mode = 'ab' + open_mode = 'wb' + if resume_len != 0: + if self.params.get('continuedl', False): + self.report_resuming_byte(resume_len) + request.add_header('Range','bytes=%d-' % resume_len) + open_mode = 'ab' + else: + resume_len = 0 count = 0 retries = self.params.get('retries', 0) while count <= retries: # Establish connection try: + if count == 0 and 'urlhandle' in info_dict: + data = info_dict['urlhandle'] data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: @@ -706,7 +926,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -751,6 +971,7 @@ class FileDownloader(object): if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + assert stream is not None filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: @@ -764,14 +985,20 @@ class FileDownloader(object): block_size = self.best_block_size(after - before, len(data_block)) # Progress message - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) + if data_len is None: + self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') + else: + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) # Apply rate limit self.slow_down(start, byte_counter - resume_len) + if stream is None: + self.trouble(u'\nERROR: Did not get any data blocks') + return False stream.close() self.report_finish() if data_len is not None and byte_counter != data_len: @@ -780,10 +1007,11 @@ class FileDownloader(object): # Update file modification time if self.params.get('updatetime', True): - self.try_utime(filename, data.info().get('last-modified', None)) + info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) return True + class InfoExtractor(object): """Information Extractor class. @@ -814,9 +1042,8 @@ class InfoExtractor(object): description: One-line video description. Subclasses of this one should re-define the _real_initialize() and - _real_extract() methods, as well as the suitable() static method. - Probably, they should also be instantiated and added to the main - downloader. + _real_extract() methods and define a _VALID_URL regexp. + Probably, they should also be added to the list of extractors. """ _ready = False @@ -827,10 +1054,9 @@ class InfoExtractor(object): self._ready = False self.set_downloader(downloader) - @staticmethod - def suitable(url): + def suitable(self, url): """Receives a URL and returns True if suitable for this IE.""" - return False + return re.match(self._VALID_URL, url) is not None def initialize(self): """Initializes an instance (authentication, etc).""" @@ -855,16 +1081,17 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -873,12 +1100,25 @@ class YoutubeIE(InfoExtractor): '37': 'mp4', '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', + '44': 'webm', '45': 'webm', } - - @staticmethod - def suitable(url): - return (re.match(YoutubeIE._VALID_URL, url) is not None) + _video_dimensions = { + '5': '240x400', + '6': '???', + '13': '???', + '17': '144x176', + '18': '360x640', + '22': '720x1280', + '34': '360x640', + '35': '480x854', + '37': '1080x1920', + '38': '3072x4096', + '43': '360x640', + '44': '480x854', + '45': '720x1280', + } + IE_NAME = u'youtube' def report_lang(self): """Report attempt to set language.""" @@ -912,6 +1152,11 @@ class YoutubeIE(InfoExtractor): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') + def _print_formats(self, formats): + print 'Available formats:' + for x in formats: + print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) + def _real_initialize(self): if self._downloader is None: return @@ -991,7 +1236,7 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1009,7 +1254,7 @@ class YoutubeIE(InfoExtractor): self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + % (video_id, el_type)) request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() @@ -1067,11 +1312,19 @@ class YoutubeIE(InfoExtractor): pass # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + try: + lxml.etree + except NameError: + video_description = u'No description available.' + if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1).decode('utf-8') + else: + html_parser = lxml.etree.HTMLParser(encoding='utf-8') + vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) + video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) + # TODO use another parser # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1079,10 +1332,15 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') - url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] - url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) + url_data = [parse_qs(uds) for uds in url_data_strs] + url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) + url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) + format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -1092,23 +1350,29 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - if req_format is None: + if self._downloader.params.get('listformats', None): + self._print_formats(existing_formats) + return + if req_format is None or req_format == 'best': video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality - elif req_format == '-1': + elif req_format == 'worst': + video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality + elif req_format in ('-1', 'all'): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - # Specific format - if req_format not in url_map: + # Specific formats. We pick the first in a slash-delimeted sequence. + # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. + req_formats = req_format.split('/') + video_url_list = None + for rf in req_formats: + if rf in url_map: + video_url_list = [(rf, url_map[rf])] + break + if video_url_list is None: self._downloader.trouble(u'ERROR: requested format not available') return - video_url_list = [(req_format, url_map[req_format])] # Specific format - - elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] - else: - self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return for format_param, video_real_url in video_url_list: @@ -1118,7 +1382,6 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -1131,7 +1394,7 @@ class YoutubeIE(InfoExtractor): 'ext': video_extension.decode('utf-8'), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'thumbnail': video_thumbnail.decode('utf-8'), - 'description': video_description.decode('utf-8'), + 'description': video_description, 'player_url': player_url, }) except UnavailableVideoError, err: @@ -1145,15 +1408,12 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' _youtube_ie = None + IE_NAME = u'metacafe' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(MetacafeIE._VALID_URL, url) is not None) - def report_disclaimer(self): """Report disclaimer retrieval.""" self._downloader.to_screen(u'[metacafe] Retrieving disclaimer') @@ -1287,14 +1547,11 @@ class DailymotionIE(InfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + IE_NAME = u'dailymotion' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(DailymotionIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) @@ -1322,6 +1579,7 @@ class DailymotionIE(InfoExtractor): # Retrieve video webpage to extract further information request = urllib2.Request(url) + request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1331,25 +1589,29 @@ class DailymotionIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + sequence = urllib.unquote(mobj.group(1)) + mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '') # if needed add http://www.dailymotion.com/ if relative URL video_url = mediaURL - # '' - mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)(.+?)', webpage) + mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -1371,18 +1633,16 @@ class DailymotionIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' + IE_NAME = u'video.google' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(GoogleIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id) @@ -1464,7 +1724,6 @@ class GoogleIE(InfoExtractor): else: # we need something to pass to process_info video_thumbnail = '' - try: # Process video information self._downloader.process_info({ @@ -1486,14 +1745,11 @@ class PhotobucketIE(InfoExtractor): """Information extractor for photobucket.com.""" _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + IE_NAME = u'photobucket' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(PhotobucketIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id) @@ -1571,14 +1827,11 @@ class YahooIE(InfoExtractor): # _VPAGE_URL matches only the extractable '/watch/' URLs _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + IE_NAME = u'video.yahoo' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(YahooIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id) @@ -1664,7 +1917,8 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video description') return video_description = mobj.group(1).decode('utf-8') - if not video_description: video_description = 'No description available.' + if not video_description: + video_description = 'No description available.' # Extract video height and width mobj = re.search(r'', webpage) @@ -1685,8 +1939,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1715,23 +1969,146 @@ class YahooIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, - 'description': video_description, 'player_url': None, }) except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): + """Information extractor for vimeo.com.""" + + # _VALID_URL matches Vimeo URLs + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + IE_NAME = u'vimeo' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url, new_video=True): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # At this point we have a new video + self._downloader.increment_downloads() + video_id = mobj.group(1) + + # Retrieve video webpage to extract further information + request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Now we begin extracting as much information as we can from what we + # retrieved. First we extract the information common to all extractors, + # and latter we extract those that are Vimeo specific. + self.report_extraction(video_id) + + # Extract title + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + # Extract uploader + mobj = re.search(r'http://vimeo.com/(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video thumbnail + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # # Extract video description + # mobj = re.search(r'', webpage) + # if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract video description') + # return + # video_description = mobj.group(1).decode('utf-8') + # if not video_description: video_description = 'No description available.' + video_description = 'Foo.' + + # Vimeo specific: extract request signature + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature') + return + sig = mobj.group(1).decode('utf-8') + + # Vimeo specific: extract video quality information + mobj = re.search(r'(\d+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video quality information') + return + quality = mobj.group(1).decode('utf-8') + + if int(quality) == 1: + quality = 'hd' + else: + quality = 'sd' + + # Vimeo specific: Extract request signature expiration + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature expiration') + return + sig_exp = mobj.group(1).decode('utf-8') + + video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url, + 'uploader': video_uploader, + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'mp4', + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') + + class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" + _VALID_URL = r'.*' + IE_NAME = u'generic' + def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return True - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.') @@ -1779,11 +2156,11 @@ class GenericIE(InfoExtractor): return video_url = urllib.unquote(mobj.group(1)) - video_id = os.path.basename(video_url) + video_id = os.path.basename(video_url) # here's a fun little line of code for you: video_extension = os.path.splitext(video_id)[1][1:] - video_id = os.path.splitext(video_id)[0] + video_id = os.path.splitext(video_id)[0] # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -1825,21 +2202,18 @@ class GenericIE(InfoExtractor): class YoutubeSearchIE(InfoExtractor): """Information Extractor for YouTube search queries.""" - _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None _max_youtube_results = 1000 + IE_NAME = u'youtube:search' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1849,14 +2223,14 @@ class YoutubeSearchIE(InfoExtractor): self._youtube_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1870,7 +2244,7 @@ class YoutubeSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_youtube_results: - self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results self._download_n_results(query, n) return @@ -1914,23 +2288,21 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 + class GoogleSearchIE(InfoExtractor): """Information Extractor for Google Video search queries.""" - _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' _MORE_PAGES_INDICATOR = r'Next' _google_ie = None _max_google_results = 1000 + IE_NAME = u'video.google:search' def __init__(self, google_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._google_ie = google_ie - @staticmethod - def suitable(url): - return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1940,14 +2312,14 @@ class GoogleSearchIE(InfoExtractor): self._google_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1961,7 +2333,7 @@ class GoogleSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_google_results: - self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) n = self._max_google_results self._download_n_results(query, n) return @@ -2005,23 +2377,21 @@ class GoogleSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries.""" - _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"' _MORE_PAGES_INDICATOR = r'\s*Next' _yahoo_ie = None _max_yahoo_results = 1000 + IE_NAME = u'video.yahoo:search' def __init__(self, yahoo_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._yahoo_ie = yahoo_ie - @staticmethod - def suitable(url): - return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -2031,14 +2401,14 @@ class YahooSearchIE(InfoExtractor): self._yahoo_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2052,7 +2422,7 @@ class YahooSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_yahoo_results: - self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) n = self._max_yahoo_results self._download_n_results(query, n) return @@ -2096,23 +2466,21 @@ class YahooSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None + IE_NAME = u'youtube:playlist' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) - def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) @@ -2172,28 +2540,26 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return + class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" - _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _youtube_ie = None + IE_NAME = u'youtube:user' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubeUserIE._VALID_URL, url) is not None) - def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % - (username, start_index, start_index + self._GDATA_PAGE_SIZE)) + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2255,9 +2621,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2266,15 +2632,12 @@ class YoutubeUserIE(InfoExtractor): class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' + IE_NAME = u'DepositFiles' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(DepositFilesIE._VALID_URL, url) is not None) - def report_download_webpage(self, file_id): """Report webpage download.""" self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) @@ -2342,10 +2705,11 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') + class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P\d+)(?:.*)' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' _available_formats = ['highqual', 'lowqual'] @@ -2353,14 +2717,11 @@ class FacebookIE(InfoExtractor): 'highqual': 'mp4', 'lowqual': 'mp4', } + IE_NAME = u'facebook' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(FacebookIE._VALID_URL, url) is not None) - def _reporter(self, message): """Add header and report message.""" self._downloader.to_screen(u'[facebook] %s' % message) @@ -2508,10 +2869,7 @@ class FacebookIE(InfoExtractor): pass # description - video_description = 'No description available.' - if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): - video_description = video_info['description'] + video_description = video_info.get('description', 'No description available.') url_map = video_info['video_urls'] if len(url_map.keys()) > 0: @@ -2529,6 +2887,8 @@ class FacebookIE(InfoExtractor): return if req_format is None: video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == 'worst': + video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: @@ -2546,7 +2906,6 @@ class FacebookIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'mp4') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -2565,36 +2924,593 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') -class PostProcessor(object): - """Post Processor class. - - PostProcessor objects can be added to downloaders with their - add_post_processor() method. When the downloader has finished a - successful download, it will take its internal chain of PostProcessors - and start calling the run() method on each one of them, first with - an initial argument and then with the returned value of the previous - PostProcessor. - - The chain will be stopped if one of them ever returns None or the end - of the chain is reached. +class BlipTVIE(InfoExtractor): + """Information extractor for blip.tv""" - PostProcessor objects follow a "mutual registration" process similar - to InfoExtractor objects. - """ + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' + _URL_EXT = r'^.*\.([a-z0-9]+)$' + IE_NAME = u'blip.tv' - _downloader = None + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id)) - def __init__(self, downloader=None): - self._downloader = downloader + def report_direct_download(self, title): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title)) - def set_downloader(self, downloader): - """Sets the downloader for this PP.""" - self._downloader = downloader + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res - def run(self, information): - """Run the PostProcessor. + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return - The "information" argument is a dictionary like the ones + if '?' in url: + cchar = '&' + else: + cchar = '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' + request = urllib2.Request(json_url) + self.report_extraction(mobj.group(1)) + info = None + try: + urlh = urllib2.urlopen(request) + if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download + basename = url.split('/')[-1] + title,ext = os.path.splitext(basename) + ext = ext.replace('.', '') + self.report_direct_download(title) + info = { + 'id': title, + 'url': url, + 'title': title, + 'stitle': self._simplify_title(title), + 'ext': ext, + 'urlhandle': urlh + } + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + if info is None: # Regular URL + try: + json_code = urlh.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err)) + return + + try: + json_data = json.loads(json_code) + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + video_url = data['media']['url'] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + info = { + 'id': data['item_id'], + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'stitle': self._simplify_title(data['title']), + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'] + } + except (ValueError,KeyError), err: + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) + return + + self._downloader.increment_downloads() + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + +class MyVideoIE(InfoExtractor): + """Information Extractor for myvideo.de.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + IE_NAME = u'myvideo' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._download.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + simple_title = mobj.group(2).decode('utf-8') + # should actually not be necessary + simple_title = sanitize_title(simple_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + + # Get video webpage + request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + mobj = re.search(r'', + webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + video_url = mobj.group(1) + ('/%s.flv' % video_id) + + mobj = re.search('([^<]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + + video_title = mobj.group(1) + video_title = sanitize_title(video_title) + + try: + self._downloader.process_info({ + 'id': video_id, + 'url': video_url, + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'flv', + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: Unable to download video') + +class ComedyCentralIE(InfoExtractor): + """Information extractor for The Daily Show and Colbert Report """ + + _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' + IE_NAME = u'comedycentral' + + def report_extraction(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) + + def report_config_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + + def report_index_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) + + def report_player_url(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + if mobj.group('shortname'): + if mobj.group('shortname') in ('tds', 'thedailyshow'): + url = 'http://www.thedailyshow.com/full-episodes/' + else: + url = 'http://www.colbertnation.com/full-episodes/' + mobj = re.match(self._VALID_URL, url) + assert mobj is not None + + dlNewest = not mobj.group('episode') + if dlNewest: + epTitle = mobj.group('showname') + else: + epTitle = mobj.group('episode') + + req = urllib2.Request(url) + self.report_extraction(epTitle) + try: + htmlHandle = urllib2.urlopen(req) + html = htmlHandle.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + if dlNewest: + url = htmlHandle.geturl() + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) + return + if mobj.group('episode') == '': + self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) + return + epTitle = mobj.group('episode') + + mMovieParams = re.findall('', html) + if len(mMovieParams) == 0: + self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + return + + playerUrl_raw = mMovieParams[0][0] + self.report_player_url(epTitle) + try: + urlHandle = urllib2.urlopen(playerUrl_raw) + playerUrl = urlHandle.geturl() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) + return + + uri = mMovieParams[0][1] + indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri}) + self.report_index_download(epTitle) + try: + indexXml = urllib2.urlopen(indexUrl).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) + return + + idoc = xml.etree.ElementTree.fromstring(indexXml) + itemEls = idoc.findall('.//item') + for itemEl in itemEls: + mediaId = itemEl.findall('./guid')[0].text + shortMediaId = mediaId.split(':')[-1] + showId = mediaId.split(':')[-2].replace('.com', '') + officialTitle = itemEl.findall('./title')[0].text + officialDate = itemEl.findall('./pubDate')[0].text + + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + + urllib.urlencode({'uri': mediaId})) + configReq = urllib2.Request(configUrl) + self.report_config_download(epTitle) + try: + configXml = urllib2.urlopen(configReq).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + cdoc = xml.etree.ElementTree.fromstring(configXml) + turls = [] + for rendition in cdoc.findall('.//rendition'): + finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) + turls.append(finfo) + + if len(turls) == 0: + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') + continue + + # For now, just pick the highest bitrate + format,video_url = turls[-1] + + self._downloader.increment_downloads() + + effTitle = showId + '-' + epTitle + info = { + 'id': shortMediaId, + 'url': video_url, + 'uploader': showId, + 'upload_date': officialDate, + 'title': effTitle, + 'stitle': self._simplify_title(effTitle), + 'ext': 'mp4', + 'format': format, + 'thumbnail': None, + 'description': officialTitle, + 'player_url': playerUrl + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId) + continue + + +class EscapistIE(InfoExtractor): + """Information extractor for The Escapist """ + + _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?]?.*$' + IE_NAME = u'escapist' + + def report_extraction(self, showName): + self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) + + def report_config_download(self, showName): + self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + showName = mobj.group('showname') + videoId = mobj.group('episode') + + self.report_extraction(showName) + try: + webPage = urllib2.urlopen(url).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) + return + + descMatch = re.search('[0-9]+)/(?P.*)$' + IE_NAME = u'collegehumor' + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('videoid') + + self.report_webpage(video_id) + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + m = re.search(r'id="video:(?P[0-9]+)"', webpage) + if m is None: + self._downloader.trouble(u'ERROR: Cannot extract internal video ID') + return + internal_video_id = m.group('internalvideoid') + + info = { + 'id': video_id, + 'internal_id': internal_video_id, + } + + self.report_extraction(video_id) + xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id + try: + metaXml = urllib2.urlopen(xmlUrl).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) + return + + mdoc = xml.etree.ElementTree.fromstring(metaXml) + try: + videoNode = mdoc.findall('./video')[0] + info['description'] = videoNode.findall('./description')[0].text + info['title'] = videoNode.findall('./caption')[0].text + info['stitle'] = self._simplify_title(info['title']) + info['url'] = videoNode.findall('./file')[0].text + info['thumbnail'] = videoNode.findall('./thumbnail')[0].text + info['ext'] = info['url'].rpartition('.')[2] + info['format'] = info['ext'] + except IndexError: + self._downloader.trouble(u'\nERROR: Invalid metadata XML file') + return + + self._downloader.increment_downloads() + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + +class XVideosIE(InfoExtractor): + """Information extractor for xvideos.com""" + + _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' + IE_NAME = u'xvideos' + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group(1).decode('utf-8') + + self.report_webpage(video_id) + + request = urllib2.Request(r'http://www.xvideos.com/video' + video_id) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + + + # Extract video URL + mobj = re.search(r'flv_url=(.+?)&', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video url') + return + video_url = urllib2.unquote(mobj.group(1).decode('utf-8')) + + + # Extract title + mobj = re.search(r'(.*?)\s+-\s+XVID', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + + + # Extract video thumbnail + mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + + + self._downloader.increment_downloads() + info = { + 'id': video_id, + 'url': video_url, + 'uploader': None, + 'upload_date': None, + 'title': video_title, + 'stitle': self._simplify_title(video_title), + 'ext': 'flv', + 'format': 'flv', + 'thumbnail': video_thumbnail, + 'description': None, + 'player_url': None, + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download ' + video_id) + + +class PostProcessor(object): + """Post Processor class. + + PostProcessor objects can be added to downloaders with their + add_post_processor() method. When the downloader has finished a + successful download, it will take its internal chain of PostProcessors + and start calling the run() method on each one of them, first with + an initial argument and then with the returned value of the previous + PostProcessor. + + The chain will be stopped if one of them ever returns None or the end + of the chain is reached. + + PostProcessor objects follow a "mutual registration" process similar + to InfoExtractor objects. + """ + + _downloader = None + + def __init__(self, downloader=None): + self._downloader = downloader + + def set_downloader(self, downloader): + """Sets the downloader for this PP.""" + self._downloader = downloader + + def run(self, information): + """Run the PostProcessor. + + The "information" argument is a dictionary like the ones composed by InfoExtractors. The only difference is that this one has an extra field called "filepath" that points to the downloaded file. @@ -2611,13 +3527,16 @@ class PostProcessor(object): """ return information # by default, do nothing + class FFmpegExtractAudioPP(PostProcessor): - def __init__(self, downloader=None, preferredcodec=None): + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False): PostProcessor.__init__(self, downloader) if preferredcodec is None: preferredcodec = 'best' self._preferredcodec = preferredcodec + self._preferredquality = preferredquality + self._keepvideo = keepvideo @staticmethod def get_audio_codec(path): @@ -2656,24 +3575,32 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec: - if filecodec == 'aac' or filecodec == 'mp3': + if filecodec in ['aac', 'mp3', 'vorbis']: # Lossless if possible acodec = 'copy' extension = filecodec if filecodec == 'aac': more_opts = ['-f', 'adts'] + if filecodec == 'vorbis': + extension = 'ogg' else: # MP3 otherwise. acodec = 'libmp3lame' extension = 'mp3' - more_opts = ['-ab', '128k'] + more_opts = [] + if self._preferredquality is not None: + more_opts += ['-ab', self._preferredquality] else: # We convert the audio (lossy) - acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] + acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec] extension = self._preferredcodec - more_opts = ['-ab', '128k'] + more_opts = [] + if self._preferredquality is not None: + more_opts += ['-ab', self._preferredquality] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] + if self._preferredcodec == 'vorbis': + extension = 'ogg' (prefix, ext) = os.path.splitext(path) new_path = prefix + '.' + extension @@ -2684,320 +3611,445 @@ class FFmpegExtractAudioPP(PostProcessor): self._downloader.to_stderr(u'WARNING: error running ffmpeg') return None - try: - os.remove(path) - except (IOError, OSError): - self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') - return None + # Try to update the date time for extracted audio file. + if information.get('filetime') is not None: + try: + os.utime(new_path, (time.time(), information['filetime'])) + except: + self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') + + if not self._keepvideo: + try: + os.remove(path) + except (IOError, OSError): + self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') + return None information['filepath'] = new_path return information -### MAIN PROGRAM ### -if __name__ == '__main__': + +def updateSelf(downloader, filename): + ''' Update the program file with the latest version from the repository ''' + # Note: downloader only used for options + if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_screen('Updating to latest version...') + try: - # Modules needed only when running the main program - import getpass - import optparse + try: + urlh = urllib.urlopen(UPDATE_URL) + newcontent = urlh.read() + + vmatch = re.search("__version__ = '([^']+)'", newcontent) + if vmatch is not None and vmatch.group(1) == __version__: + downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')') + return + finally: + urlh.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') - # Function to update the program file with the latest version from the repository. - def update_self(downloader, filename): - # Note: downloader only used for options - if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) + try: + outf = open(filename, 'wb') + try: + outf.write(newcontent) + finally: + outf.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updating to latest stable version...') - try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - except (IOError, OSError), err: - sys.exit('ERROR: unable to download latest version') - try: - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() - except (IOError, OSError), err: - sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) - - # Parse command line - parser = optparse.OptionParser( - usage='Usage: %prog [options] url...', - version='2011.08.04', - conflict_handler='resolve', - ) - - parser.add_option('-h', '--help', - action='help', help='print this help text and exit') - parser.add_option('-v', '--version', - action='version', help='print program version and exit') - parser.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') - parser.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') - parser.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - parser.add_option('--playlist-start', - dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) - parser.add_option('--playlist-end', - dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) - parser.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - - authentication = optparse.OptionGroup(parser, 'Authentication Options') - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - parser.add_option_group(authentication) - - video_format = optparse.OptionGroup(parser, 'Video Format Options') - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='-1') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - parser.add_option_group(video_format) - - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - parser.add_option_group(verbosity) - - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template') - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - parser.add_option_group(filesystem) - - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg and ffprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') - parser.add_option_group(postproc) - - (opts, args) = parser.parse_args() - - # Open appropriate CookieJar - if opts.cookiefile is None: - jar = cookielib.CookieJar() - else: - try: - jar = cookielib.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): - jar.load() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to open cookie file') + downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.') - # Dump user agent - if opts.dump_user_agent: - print std_headers['User-Agent'] - sys.exit(0) +def parseOpts(): + # Deferred imports + import getpass + import optparse - # General configuration - cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + def _format_option_string(option): + ''' ('-o', '--option') -> -o, --format METAVAR''' + + opts = [] + + if option._short_opts: opts.append(option._short_opts[0]) + if option._long_opts: opts.append(option._long_opts[0]) + if len(opts) > 1: opts.insert(1, ', ') + + if option.takes_value(): opts.append(' %s' % option.metavar) + + return "".join(opts) + + def _find_term_columns(): + columns = os.environ.get('COLUMNS', None) + if columns: + return int(columns) - # Batch file verification - batchurls = [] - if opts.batchfile is not None: - try: - if opts.batchfile == '-': - batchfd = sys.stdin - else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] - except IOError: - sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args - - # Conflicting, missing and erroneous options - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error(u'using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') - if opts.username is not None and opts.password is None: - opts.password = getpass.getpass(u'Type account password and press return:') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error(u'invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.retries is not None: - try: - opts.retries = long(opts.retries) - except (TypeError, ValueError), err: - parser.error(u'invalid retry count specified') try: - opts.playliststart = long(opts.playliststart) - if opts.playliststart <= 0: - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist start number specified') + sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out,err = sp.communicate() + return int(out.split()[1]) + except: + pass + return None + + max_width = 80 + max_help_position = 80 + + # No need to wrap help messages if we're on a wide console + columns = _find_term_columns() + if columns: max_width = columns + + fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) + fmt.format_option_strings = _format_option_string + + kw = { + 'version' : __version__, + 'formatter' : fmt, + 'usage' : '%prog [options] url [url...]', + 'conflict_handler' : 'resolve', + } + + parser = optparse.OptionParser(**kw) + + # option groups + general = optparse.OptionGroup(parser, 'General Options') + selection = optparse.OptionGroup(parser, 'Video Selection') + authentication = optparse.OptionGroup(parser, 'Authentication Options') + video_format = optparse.OptionGroup(parser, 'Video Format Options') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + + general.add_option('-h', '--help', + action='help', help='print this help text and exit') + general.add_option('-v', '--version', + action='version', help='print program version and exit') + general.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest version') + general.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + general.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') + general.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + general.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) + general.add_option('--list-extractors', + action='store_true', dest='list_extractors', + help='List all supported extractors and the URLs they would handle', default=False) + + selection.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + selection.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') + selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') + + authentication.add_option('-u', '--username', + dest='username', metavar='USERNAME', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PASSWORD', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FORMAT', help='video format code') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='all') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-F', '--list-formats', + action='store_true', dest='listformats', help='list all available formats (currently youtube only)') + + + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) + verbosity.add_option('--skip-download', + action='store_true', dest='skip_download', help='do not download the video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--get-format', + action='store_true', dest='getformat', + help='simulate, quiet but print output format', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) + + + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--no-continue', + action='store_false', dest='continue_dl', + help='do not resume partially downloaded files (restart from beginning)') + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) + filesystem.add_option('--write-description', + action='store_true', dest='writedescription', + help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) + + + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac", "vorbis" or "mp3"; best by default') + postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', + help='ffmpeg audio bitrate specification, 128k by default') + postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, + help='keeps the video file on disk after the post-processing; the video is erased by default') + + + parser.add_option_group(general) + parser.add_option_group(selection) + parser.add_option_group(filesystem) + parser.add_option_group(verbosity) + parser.add_option_group(video_format) + parser.add_option_group(authentication) + parser.add_option_group(postproc) + + opts, args = parser.parse_args() + + return parser, opts, args + +def gen_extractors(): + """ Return a list of an instance of every supported extractor. + The order does matter; the first extractor matched is the one handling the URL. + """ + youtube_ie = YoutubeIE() + google_ie = GoogleIE() + yahoo_ie = YahooIE() + return [ + YoutubePlaylistIE(youtube_ie), + YoutubeUserIE(youtube_ie), + YoutubeSearchIE(youtube_ie), + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), + google_ie, + GoogleSearchIE(google_ie), + PhotobucketIE(), + yahoo_ie, + YahooSearchIE(yahoo_ie), + DepositFilesIE(), + FacebookIE(), + BlipTVIE(), + VimeoIE(), + MyVideoIE(), + ComedyCentralIE(), + EscapistIE(), + CollegeHumorIE(), + XVideosIE(), + + GenericIE() + ] + +def main(): + parser, opts, args = parseOpts() + + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: try: - opts.playlistend = long(opts.playlistend) - if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist end number specified') - if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: - parser.error(u'invalid audio format specified') - - # Information extractors - youtube_ie = YoutubeIE() - metacafe_ie = MetacafeIE(youtube_ie) - dailymotion_ie = DailymotionIE() - youtube_pl_ie = YoutubePlaylistIE(youtube_ie) - youtube_user_ie = YoutubeUserIE(youtube_ie) - youtube_search_ie = YoutubeSearchIE(youtube_ie) - google_ie = GoogleIE() - google_search_ie = GoogleSearchIE(google_ie) - photobucket_ie = PhotobucketIE() - yahoo_ie = YahooIE() - yahoo_search_ie = YahooSearchIE(yahoo_ie) - deposit_files_ie = DepositFilesIE() - facebook_ie = FacebookIE() - generic_ie = GenericIE() - - # File downloader - fd = FileDownloader({ - 'usenetrc': opts.usenetrc, - 'username': opts.username, - 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'forceurl': opts.geturl, - 'forcetitle': opts.gettitle, - 'forcethumbnail': opts.getthumbnail, - 'forcedescription': opts.getdescription, - 'forcefilename': opts.getfilename, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'format': opts.format, - 'format_limit': opts.format_limit, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') - or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') - or u'%(id)s.%(ext)s'), - 'ignoreerrors': opts.ignoreerrors, - 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, - 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, - 'playliststart': opts.playliststart, - 'playlistend': opts.playlistend, - 'logtostderr': opts.outtmpl == '-', - 'consoletitle': opts.consoletitle, - 'nopart': opts.nopart, - 'updatetime': opts.updatetime, - }) - fd.add_info_extractor(youtube_search_ie) - fd.add_info_extractor(youtube_pl_ie) - fd.add_info_extractor(youtube_user_ie) - fd.add_info_extractor(metacafe_ie) - fd.add_info_extractor(dailymotion_ie) - fd.add_info_extractor(youtube_ie) - fd.add_info_extractor(google_ie) - fd.add_info_extractor(google_search_ie) - fd.add_info_extractor(photobucket_ie) - fd.add_info_extractor(yahoo_ie) - fd.add_info_extractor(yahoo_search_ie) - fd.add_info_extractor(deposit_files_ie) - fd.add_info_extractor(facebook_ie) - - # This must come last since it's the - # fallback if none of the others work - fd.add_info_extractor(generic_ie) - - # PostProcessors - if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) - - # Update version - if opts.update_self: - update_self(fd, sys.argv[0]) - - # Maybe do nothing - if len(all_urls) < 1: - if not opts.update_self: - parser.error(u'you must provide at least one URL') + jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + if opts.batchfile == '-': + batchfd = sys.stdin else: - sys.exit() - retcode = fd.download(all_urls) + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) + urllib2.install_opener(opener) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + extractors = gen_extractors() + + if opts.list_extractors: + for ie in extractors: + print(ie.IE_NAME) + matchedUrls = filter(lambda url: ie.suitable(url), all_urls) + all_urls = filter(lambda url: url not in matchedUrls, all_urls) + for mu in matchedUrls: + print(u' ' + mu) + sys.exit(0) + + # Conflicting, missing and erroneous options + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') + try: + opts.playliststart = int(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError(u'Playlist start must be positive') + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = int(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError(u'Playlist end must be greater than playlist start') + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']: + parser.error(u'invalid audio format specified') + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forcefilename': opts.getfilename, + 'forceformat': opts.getformat, + 'simulate': opts.simulate, + 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), + 'format': opts.format, + 'format_limit': opts.format_limit, + 'listformats': opts.listformats, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, + 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, + 'writeinfojson': opts.writeinfojson, + 'matchtitle': opts.matchtitle, + 'rejecttitle': opts.rejecttitle, + }) + for extractor in extractors: + fd.add_info_extractor(extractor) + + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo)) + + # Update version + if opts.update_self: + updateSelf(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() + retcode = fd.download(all_urls) - # Dump cookie jar if requested - if opts.cookiefile is not None: - try: - jar.save() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to save cookie jar') + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') + + sys.exit(retcode) - sys.exit(retcode) +if __name__ == '__main__': + try: + main() except DownloadError: sys.exit(1) except SameFileError: sys.exit(u'ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: diff --git a/youtube-dl.spec b/youtube-dl.spec index 9c0aa97..49df96f 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2011.08.04 +Version: 2011.10.19 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Fri Oct 21 2011 Till Maas <opensource@till.name> - 2011.10.19-1 +- Update to latest release + * Thu Aug 04 2011 Till Maas <opensource@till.name> - 2011.08.04-1 - Update to latest release to adjust to backend changes (Red Hat Bug #728378) From d73ead74ec0646ecf0953e9c5905d51b23c82662 Mon Sep 17 00:00:00 2001 From: Till Maas <opensource@till.name> Date: Thu, 8 Dec 2011 23:22:32 +0100 Subject: [PATCH 043/279] Update to new release (fixed Red Hat Bug #758679) --- youtube-dl | 305 ++++++++++++++++++++++++++++++++++-------------- youtube-dl.spec | 5 +- 2 files changed, 223 insertions(+), 87 deletions(-) mode change 100755 => 100644 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100755 new mode 100644 index 3a37fae..e6b7be1 --- a/youtube-dl +++ b/youtube-dl @@ -12,10 +12,12 @@ __author__ = ( 'Rogério Brito', 'Philipp Hagemeister', 'Sören Schulze', + 'Kevin Ngo', + 'Ori Avtalion', ) __license__ = 'Public Domain' -__version__ = '2011.10.19' +__version__ = '2011.11.23' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' @@ -77,8 +79,6 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } -simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') - try: import json except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): @@ -277,6 +277,9 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp +def _simplify_title(title): + expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) + return expr.sub(u'_', title).strip(u'_') class DownloadError(Exception): """Download Error exception. @@ -1289,8 +1292,7 @@ class YoutubeIE(InfoExtractor): video_title = sanitize_title(video_title) # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + simple_title = _simplify_title(video_title) # thumbnail image if 'thumbnail_url' not in video_info: @@ -1560,9 +1562,6 @@ class DailymotionIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@ -1651,9 +1650,6 @@ class GoogleIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -1697,7 +1693,7 @@ class GoogleIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) # Extract video description mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage) @@ -1758,9 +1754,6 @@ class PhotobucketIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -1799,7 +1792,7 @@ class PhotobucketIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(vide_title) video_uploader = mobj.group(2).decode('utf-8') @@ -1840,9 +1833,6 @@ class YahooIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1896,7 +1886,7 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage) if mobj is None: @@ -1993,9 +1983,6 @@ class VimeoIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -2027,7 +2014,7 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) # Extract uploader mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage) @@ -2118,9 +2105,6 @@ class GenericIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # At this point we have a new video self._downloader.increment_downloads() @@ -2174,7 +2158,7 @@ class GenericIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) @@ -2470,7 +2454,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' @@ -2514,7 +2498,8 @@ class YoutubePlaylistIE(InfoExtractor): while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) + url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum) + request = urllib2.Request(url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2548,7 +2533,7 @@ class YoutubeUserIE(InfoExtractor): _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' - _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' _youtube_ie = None IE_NAME = u'youtube:user' @@ -2646,9 +2631,6 @@ class DepositFilesIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) - def _real_initialize(self): - return - def _real_extract(self, url): # At this point we have a new file self._downloader.increment_downloads() @@ -2709,11 +2691,12 @@ class DepositFilesIE(InfoExtractor): class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' - _available_formats = ['highqual', 'lowqual'] + _available_formats = ['video', 'highqual', 'lowqual'] _video_extensions = { + 'video': 'mp4', 'highqual': 'mp4', 'lowqual': 'mp4', } @@ -2741,10 +2724,9 @@ class FacebookIE(InfoExtractor): def _parse_page(self, video_webpage): """Extract video information from page""" # General data - data = {'title': r'class="video_title datawrap">(.*?)</', + data = {'title': r'\("video_title", "(.*?)"\)', 'description': r'<div class="datawrap">(.*?)</div>', 'owner': r'\("video_owner_name", "(.*?)"\)', - 'upload_date': r'data-date="(.*?)"', 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)', } video_info = {} @@ -2846,9 +2828,7 @@ class FacebookIE(InfoExtractor): video_title = video_title.decode('utf-8') video_title = sanitize_title(video_title) - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + simple_title = _simplify_title(video_title) # thumbnail image if 'thumbnail' not in video_info: @@ -2939,11 +2919,6 @@ class BlipTVIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2963,13 +2938,14 @@ class BlipTVIE(InfoExtractor): if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download basename = url.split('/')[-1] title,ext = os.path.splitext(basename) + title = title.decode('UTF-8') ext = ext.replace('.', '') self.report_direct_download(title) info = { 'id': title, 'url': url, 'title': title, - 'stitle': self._simplify_title(title), + 'stitle': _simplify_title(title), 'ext': ext, 'urlhandle': urlh } @@ -3003,7 +2979,7 @@ class BlipTVIE(InfoExtractor): 'uploader': data['display_name'], 'upload_date': upload_date, 'title': data['title'], - 'stitle': self._simplify_title(data['title']), + 'stitle': _simplify_title(data['title']), 'ext': ext, 'format': data['media']['mimeType'], 'thumbnail': data['thumbnailUrl'], @@ -3039,9 +3015,6 @@ class MyVideoIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3049,10 +3022,6 @@ class MyVideoIE(InfoExtractor): return video_id = mobj.group(1) - simple_title = mobj.group(2).decode('utf-8') - # should actually not be necessary - simple_title = sanitize_title(simple_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) # Get video webpage request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) @@ -3079,6 +3048,8 @@ class MyVideoIE(InfoExtractor): video_title = mobj.group(1) video_title = sanitize_title(video_title) + simple_title = _simplify_title(video_title) + try: self._downloader.process_info({ 'id': video_id, @@ -3112,11 +3083,6 @@ class ComedyCentralIE(InfoExtractor): def report_player_url(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3125,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = u'http://www.thedailyshow.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = u'http://www.colbertnation.com/full-episodes/' mobj = re.match(self._VALID_URL, url) assert mobj is not None @@ -3213,14 +3179,14 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() - effTitle = showId + '-' + epTitle + effTitle = showId + u'-' + epTitle info = { 'id': shortMediaId, 'url': video_url, 'uploader': showId, 'upload_date': officialDate, 'title': effTitle, - 'stitle': self._simplify_title(effTitle), + 'stitle': _simplify_title(effTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, @@ -3247,11 +3213,6 @@ class EscapistIE(InfoExtractor): def report_config_download(self, showName): self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3304,7 +3265,7 @@ class EscapistIE(InfoExtractor): 'uploader': showName, 'upload_date': None, 'title': showName, - 'stitle': self._simplify_title(showName), + 'stitle': _simplify_title(showName), 'ext': 'flv', 'format': 'flv', 'thumbnail': imgUrl, @@ -3332,11 +3293,6 @@ class CollegeHumorIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3378,7 +3334,7 @@ class CollegeHumorIE(InfoExtractor): videoNode = mdoc.findall('./video')[0] info['description'] = videoNode.findall('./description')[0].text info['title'] = videoNode.findall('./caption')[0].text - info['stitle'] = self._simplify_title(info['title']) + info['stitle'] = _simplify_title(info['title']) info['url'] = videoNode.findall('./file')[0].text info['thumbnail'] = videoNode.findall('./thumbnail')[0].text info['ext'] = info['url'].rpartition('.')[2] @@ -3409,11 +3365,6 @@ class XVideosIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3467,7 +3418,7 @@ class XVideosIE(InfoExtractor): 'uploader': None, 'upload_date': None, 'title': video_title, - 'stitle': self._simplify_title(video_title), + 'stitle': _simplify_title(video_title), 'ext': 'flv', 'format': 'flv', 'thumbnail': video_thumbnail, @@ -3481,6 +3432,184 @@ class XVideosIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download ' + video_id) +class SoundcloudIE(InfoExtractor): + """Information extractor for soundcloud.com + To access the media, the uid of the song and a stream token + must be extracted from the page source and the script must make + a request to media.soundcloud.com/crossdomain.xml. Then + the media can be grabbed by requesting from an url composed + of the stream token and uid + """ + + _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' + IE_NAME = u'soundcloud' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + # extract uploader (which is in the url) + uploader = mobj.group(1).decode('utf-8') + # extract simple title (uploader + slug of song title) + slug_title = mobj.group(2).decode('utf-8') + simple_title = uploader + '-' + slug_title + + self.report_webpage('%s/%s' % (uploader, slug_title)) + + request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title)) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + self.report_extraction('%s/%s' % (uploader, slug_title)) + + # extract uid and stream token that soundcloud hands out for access + mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage) + if mobj: + video_id = mobj.group(1) + stream_token = mobj.group(2) + + # extract unsimplified title + mobj = re.search('"title":"(.*?)",', webpage) + if mobj: + title = mobj.group(1) + + # construct media url (with uid/token) + mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s" + mediaURL = mediaURL % (video_id, stream_token) + + # description + description = u'No description available' + mobj = re.search('track-description-value"><p>(.*?)</p>', webpage) + if mobj: + description = mobj.group(1) + + # upload date + upload_date = None + mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage) + if mobj: + try: + upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') + except Exception, e: + print str(e) + + # for soundcloud, a request to a cross domain is required for cookies + request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) + + try: + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': mediaURL, + 'uploader': uploader.decode('utf-8'), + 'upload_date': upload_date, + 'title': simple_title.decode('utf-8'), + 'stitle': simple_title.decode('utf-8'), + 'ext': u'mp3', + 'format': u'NA', + 'player_url': None, + 'description': description.decode('utf-8') + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: unable to download video') + + +class InfoQIE(InfoExtractor): + """Information extractor for infoq.com""" + + _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' + IE_NAME = u'infoq' + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + self.report_webpage(url) + + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + self.report_extraction(url) + + + # Extract video URL + mobj = re.search(r"jsclassref='([^']*)'", webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video url') + return + video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64')) + + + # Extract title + mobj = re.search(r'contentTitle = "(.*?)";', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + + # Extract description + video_description = u'No description available.' + mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage) + if mobj is not None: + video_description = mobj.group(1).decode('utf-8') + + video_filename = video_url.split('/')[-1] + video_id, extension = video_filename.split('.') + + self._downloader.increment_downloads() + info = { + 'id': video_id, + 'url': video_url, + 'uploader': None, + 'upload_date': None, + 'title': video_title, + 'stitle': _simplify_title(video_title), + 'ext': extension, + 'format': extension, # Extension is always(?) mp4, but seems to be flv + 'thumbnail': None, + 'description': video_description, + 'player_url': None, + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download ' + video_url) + + + class PostProcessor(object): """Post Processor class. @@ -3877,11 +4006,13 @@ def gen_extractors(): EscapistIE(), CollegeHumorIE(), XVideosIE(), + SoundcloudIE(), + InfoQIE(), GenericIE() ] -def main(): +def _real_main(): parser, opts, args = parseOpts() # Open appropriate CookieJar @@ -4041,10 +4172,9 @@ def main(): sys.exit(retcode) - -if __name__ == '__main__': +def main(): try: - main() + _real_main() except DownloadError: sys.exit(1) except SameFileError: @@ -4052,4 +4182,7 @@ if __name__ == '__main__': except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') +if __name__ == '__main__': + main() + # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: diff --git a/youtube-dl.spec b/youtube-dl.spec index 49df96f..86ec086 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2011.10.19 +Version: 2011.11.23 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -37,6 +37,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %changelog +* Thu Dec 08 2011 Till Maas <opensource@till.name> - 2011.11.23-1 +- Update to new release (fixed Red Hat Bug #758679) + * Fri Oct 21 2011 Till Maas <opensource@till.name> - 2011.10.19-1 - Update to latest release From 559d3257eafd762c384b2cc3db003f615e8cc80c Mon Sep 17 00:00:00 2001 From: Till Maas <opensource@till.name> Date: Fri, 9 Dec 2011 00:02:34 +0100 Subject: [PATCH 044/279] update mode --- youtube-dl | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100644 new mode 100755 From 3cae609957ca74a44f171a495af8e4881dafbe73 Mon Sep 17 00:00:00 2001 From: Till Maas <opensource@till.name> Date: Sat, 10 Dec 2011 13:13:29 +0100 Subject: [PATCH 045/279] Update to new release --- youtube-dl | 352 ++++++++++++++++++++++++++++++++++++++++++++---- youtube-dl.spec | 5 +- 2 files changed, 333 insertions(+), 24 deletions(-) diff --git a/youtube-dl b/youtube-dl index e6b7be1..6a60334 100755 --- a/youtube-dl +++ b/youtube-dl @@ -14,10 +14,11 @@ __author__ = ( 'Sören Schulze', 'Kevin Ngo', 'Ori Avtalion', + 'shizeeg', ) __license__ = 'Public Domain' -__version__ = '2011.11.23' +__version__ = '2011.12.08' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' @@ -281,6 +282,14 @@ def _simplify_title(title): expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) return expr.sub(u'_', title).strip(u'_') +def _orderedSet(iterable): + """ Remove all duplicates from the input iterable """ + res = [] + for el in iterable: + if el not in res: + res.append(el) + return res + class DownloadError(Exception): """Download Error exception. @@ -308,6 +317,10 @@ class PostProcessingError(Exception): """ pass +class MaxDownloadsReached(Exception): + """ --max-downloads limit has been reached. """ + pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -698,8 +711,31 @@ class FileDownloader(object): self.trouble(u'ERROR: invalid system charset or erroneous output template') return None + def _match_entry(self, info_dict): + """ Returns None iff the file should be downloaded """ + + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + return None + def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + + reason = self._match_entry(info_dict) + if reason is not None: + self.to_screen(u'[download] ' + reason) + return + + max_downloads = self.params.get('max_downloads') + if max_downloads is not None: + if self._num_downloads > int(max_downloads): + raise MaxDownloadsReached() + filename = self.prepare_filename(info_dict) # Forced printings @@ -723,16 +759,6 @@ class FileDownloader(object): if filename is None: return - matchtitle=self.params.get('matchtitle',False) - rejecttitle=self.params.get('rejecttitle',False) - title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle)) - return - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle)) - return - if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') return @@ -1095,6 +1121,7 @@ class YoutubeIE(InfoExtractor): _NETRC_MACHINE = 'youtube' # Listed in order of quality _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] + _available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -1344,10 +1371,11 @@ class YoutubeIE(InfoExtractor): url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) - if format_limit is not None and format_limit in self._available_formats: - format_list = self._available_formats[self._available_formats.index(format_limit):] + available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats + if format_limit is not None and format_limit in available_formats: + format_list = available_formats[available_formats.index(format_limit):] else: - format_list = self._available_formats + format_list = available_formats existing_formats = [x for x in format_list if x in url_map] if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') @@ -1603,7 +1631,7 @@ class DailymotionIE(InfoExtractor): video_url = mediaURL - mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) + mobj = re.search(r'(?im)\s*(.+)\s*-\s*Video\s+Dailymotion', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return @@ -3608,6 +3636,245 @@ class InfoQIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download ' + video_url) +class MixcloudIE(InfoExtractor): + """Information extractor for www.mixcloud.com""" + _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' + IE_NAME = u'mixcloud' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_json(self, file_id): + """Report JSON download.""" + self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id)) + + def get_urls(self, jsonData, fmt, bitrate='best'): + """Get urls from 'audio_formats' section in json""" + file_url = None + try: + bitrate_list = jsonData[fmt] + if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: + bitrate = max(bitrate_list) # select highest + + url_list = jsonData[fmt][bitrate] + except TypeError: # we have no bitrate info. + url_list = jsonData[fmt] + + return url_list + + def check_urls(self, url_list): + """Returns 1st active url from list""" + for url in url_list: + try: + urllib2.urlopen(url) + return url + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + url = None + + return None + + def _print_formats(self, formats): + print 'Available formats:' + for fmt in formats.keys(): + for b in formats[fmt]: + try: + ext = formats[fmt][b][0] + print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) + except TypeError: # we have no bitrate info + ext = formats[fmt][0] + print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]) + break + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + # extract uploader & filename from url + uploader = mobj.group(1).decode('utf-8') + file_id = uploader + "-" + mobj.group(2).decode('utf-8') + + # construct API request + file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' + # retrieve .json file with links to files + request = urllib2.Request(file_url) + try: + self.report_download_json(file_url) + jsonData = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err)) + return + + # parse JSON + json_data = json.loads(jsonData) + player_url = json_data['player_swf_url'] + formats = dict(json_data['audio_formats']) + + req_format = self._downloader.params.get('format', None) + bitrate = None + + if self._downloader.params.get('listformats', None): + self._print_formats(formats) + return + + if req_format is None or req_format == 'best': + for format_param in formats.keys(): + url_list = self.get_urls(formats, format_param) + # check urls + file_url = self.check_urls(url_list) + if file_url is not None: + break # got it! + else: + if req_format not in formats.keys(): + self._downloader.trouble(u'ERROR: format is not available') + return + + url_list = self.get_urls(formats, req_format) + file_url = self.check_urls(url_list) + format_param = req_format + + # We have audio + self._downloader.increment_downloads() + try: + # Process file information + self._downloader.process_info({ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': uploader.decode('utf-8'), + 'upload_date': u'NA', + 'title': json_data['name'], + 'stitle': _simplify_title(json_data['name']), + 'ext': file_url.split('.')[-1].decode('utf-8'), + 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'thumbnail': json_data['thumbnail_url'], + 'description': json_data['description'], + 'player_url': player_url.decode('utf-8'), + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download file') + +class StanfordOpenClassroomIE(InfoExtractor): + """Information extractor for Stanford's Open ClassRoom""" + + _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P