From 65b5d97a7de5ff869c63caaa901bf458af8d2fb1 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Wed, 17 Feb 2010 03:34:46 +0000 Subject: [PATCH 1/9] Initialize branch F-13 for youtube-dl --- branch | 1 + 1 file changed, 1 insertion(+) create mode 100644 branch diff --git a/branch b/branch new file mode 100644 index 0000000..baa94ef --- /dev/null +++ b/branch @@ -0,0 +1 @@ +F-13 From 6f9ab6ce2d00988571b27175a1990397a0b4755b Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 29 Apr 2010 09:27:05 +0000 Subject: [PATCH 2/9] update to new release to fix download issues for some videos --- Home | 208 ++++++++------ youtube-dl | 791 +++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 823 insertions(+), 176 deletions(-) diff --git a/Home b/Home index 25f453b..1be19d9 100644 --- a/Home +++ b/Home @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
@@ -519,7 +525,7 @@ Piston 0.2.3rc1 / Hg 1.3.1 / Python 2.5.2 / - r2890| fe01 + r2988| fe01
diff --git a/index.html b/index.html index 25f453b..1be19d9 100644 --- a/index.html +++ b/index.html @@ -8,11 +8,13 @@ - - + + + + - - + + - + - + +
@@ -489,8 +483,8 @@ Django 1.2.1 / Piston 0.2.3rc1 / Hg 1.3.1 / - Python 2.5.2 / - r3065| fe01 + Python 2.7.0 / + r3099| bfg02
diff --git a/youtube-dl b/youtube-dl index e89c915..5fd331e 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -131,7 +131,7 @@ class PostProcessingError(Exception): """ pass -class UnavailableFormatError(Exception): +class UnavailableVideoError(Exception): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -401,7 +401,7 @@ class FileDownloader(object): try: self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + raise UnavailableVideoError # Forced printings if self.params.get('forcetitle', False): @@ -435,7 +435,7 @@ class FileDownloader(object): try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: - raise UnavailableFormatError + raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -684,12 +684,12 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - # Listed in order of priority for the -b option - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + # Listed in order of quality + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -812,124 +812,109 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - - # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() video_id = mobj.group(2) - # Downloader parameters - best_quality = False - all_formats = False - format_param = None - quality_index = 0 - if self._downloader is not None: - params = self._downloader.params - format_param = params.get('format', None) - if format_param == '0': - format_limit = params.get('format_limit', None) - if format_limit is not None: - try: - # Start at a different format if the user has limited the maximum quality - quality_index = self._available_formats.index(format_limit) - except ValueError: - pass - format_param = self._available_formats[quality_index] - best_quality = True - elif format_param == '-1': - format_param = self._available_formats[quality_index] - all_formats = True - - while True: - # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return - # Get video webpage - self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + + # Get video info + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) try: - video_webpage = urllib2.urlopen(request).read() + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + self.report_information_extraction(video_id) - # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + # uploader + if 'author' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote_plus(video_info['author'][0]) + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) if mobj is not None: - player_url = mobj.group(1) + video_description = mobj.group(1) + + # Decide which formats to download + if 'fmt_url_map' in video_info: + url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + format_limit = self._downloader.params.get('format_limit', None) + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] else: - player_url = None - - # Get video info - self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) - try: - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - if 'token' not in video_info: - # Attempt to see if YouTube has issued an error message - if 'reason' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') - stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') - stream.write(video_info_webpage) - stream.close() - else: - reason = urllib.unquote_plus(video_info['reason'][0]) - self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') return - token = urllib.unquote_plus(video_info['token'][0]) - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - - # Check possible RTMP download - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_real_url = video_info['conn'][0] - - # uploader - if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + requested_format = self._downloader.params.get('format', None) + if requested_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif requested_format == '-1': + video_url_list = url_map.items() # All formats + else: + if requested_format not in existing_formats: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + else: + self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + return - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = urllib.unquote_plus(video_info['title'][0]) - video_title = video_title.decode('utf-8') - video_title = sanitize_title(video_title) - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') - - # thumbnail image - if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - - # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + for format_param, video_real_url in video_url_list: + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -944,32 +929,8 @@ class YoutubeIE(InfoExtractor): 'description': video_description.decode('utf-8'), 'player_url': player_url, }) - - if all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # None left to get - return - else: - format_param = self._available_formats[quality_index] - continue - return - - except UnavailableFormatError, err: - if best_quality or all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # I don't ever expect this to happen - if not all_formats: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - else: - self.report_unavailable_format(video_id, format_param) - format_param = self._available_formats[quality_index] - continue - else: - self._downloader.trouble('ERROR: format not available for video') - return + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1043,8 +1004,7 @@ class MetacafeIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1101,8 +1061,8 @@ class MetacafeIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1136,8 +1096,7 @@ class DailymotionIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1190,8 +1149,8 @@ class DailymotionIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1224,8 +1183,7 @@ class GoogleIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1300,8 +1258,8 @@ class GoogleIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1335,8 +1293,7 @@ class PhotobucketIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1382,8 +1339,8 @@ class PhotobucketIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1420,8 +1377,7 @@ class YahooIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None and new_video: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1540,8 +1496,8 @@ class YahooIE(InfoExtractor): 'description': video_description, 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1568,8 +1524,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = url.split('/')[-1] request = urllib2.Request(url) @@ -1640,8 +1595,8 @@ class GenericIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): @@ -2109,7 +2064,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.06.06', + version='2010.07.22', conflict_handler='resolve', ) @@ -2138,16 +2093,12 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') - video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') diff --git a/youtube-dl.spec b/youtube-dl.spec index fd32319..410ed8b 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.14 +Version: 2010.07.22 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +- Update to latest release + * Thu Jul 15 2010 Till Maas - 2010.07.14-1 - Update to latest release From 271285f7e7520d2f4607b2b6aea5327ad4d0d17b Mon Sep 17 00:00:00 2001 From: Till Maas Date: Fri, 23 Jul 2010 14:14:44 +0000 Subject: [PATCH 7/9] fix version in changelog --- youtube-dl.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl.spec b/youtube-dl.spec index 410ed8b..ac85c2f 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -39,7 +39,7 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog -* Fri Jul 23 2010 Till Maas - 2010.07.21-1 +* Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release * Thu Jul 15 2010 Till Maas - 2010.07.14-1 From d4285850f8f9a8df0524dd93f76437f633636fb6 Mon Sep 17 00:00:00 2001 From: Fedora Release Engineering Date: Thu, 29 Jul 2010 16:21:53 +0000 Subject: [PATCH 8/9] dist-git conversion --- .cvsignore => .gitignore | 0 Makefile | 21 --------------------- branch | 1 - 3 files changed, 22 deletions(-) rename .cvsignore => .gitignore (100%) delete mode 100644 Makefile delete mode 100644 branch diff --git a/.cvsignore b/.gitignore similarity index 100% rename from .cvsignore rename to .gitignore diff --git a/Makefile b/Makefile deleted file mode 100644 index e163bdb..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile for source rpm: youtube-dl -# $Id: Makefile,v 1.1 2008/01/25 19:47:50 kevin Exp $ -NAME := youtube-dl -SPECFILE = $(firstword $(wildcard *.spec)) - -define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done -endef - -MAKEFILE_COMMON := $(shell $(find-makefile-common)) - -ifeq ($(MAKEFILE_COMMON),) -# attept a checkout -define checkout-makefile-common -test -f CVS/Root && { cvs -Q -d $$(cat CVS/Root) checkout common && echo "common/Makefile.common" ; } || { echo "ERROR: I can't figure out how to checkout the 'common' module." ; exit -1 ; } >&2 -endef - -MAKEFILE_COMMON := $(shell $(checkout-makefile-common)) -endif - -include $(MAKEFILE_COMMON) diff --git a/branch b/branch deleted file mode 100644 index baa94ef..0000000 --- a/branch +++ /dev/null @@ -1 +0,0 @@ -F-13 From 5e7d1a098c8cc7e7226b4f762b641d944a2eb92f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 5 Aug 2010 13:40:16 +0200 Subject: [PATCH 9/9] Update to latest release remove unused index.html --- Home | 54 ++--- index.html | 527 ------------------------------------------------ youtube-dl | 147 +++++++++----- youtube-dl.spec | 5 +- 4 files changed, 123 insertions(+), 610 deletions(-) delete mode 100644 index.html diff --git a/Home b/Home index aa132b1..e7bce9a 100644 --- a/Home +++ b/Home @@ -53,7 +53,7 @@ - + - - - - - - - - - - - - - - - -
-
- -
-
- - - - - - - - - - - - - - - - - - -
- -
- -
- - - -
- -
- - -
-
-
- - - - -

- rg3 / - youtube-dl - (http://bitbucket.org/rg3/youtube-dl/wiki/) - -

- - - - - -

youtube-dl is a small command-line program for downloading videos from YouTube.com.

- -
Clone this repository (size: 255.5 KB): HTTPS / SSH
-
$ hg clone http://bitbucket.org/rg3/youtube-dl
- -
- -
- - - - -
- - - - - - - -
- -
-
- - - -
-



-

youtube-dl: Download videos from YouTube.com

-

(and more...)

-

What is it?

-

youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.04.04. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

-

I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

-

Thanks for all the feedback received so far. I'm glad people find my program useful.

-

Usage instructions

-

In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

-

In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

-

After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

-

If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

-

More usage tips

-
  • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". Read the Output template section for more details on this. -
  • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password for a YouTube.com account with the -u and -p options, like youtube-dl -u myusername -p mypassword "http://www.youtube.com/watch?v=foobar". -
  • The account data can also be read from the user .netrc file by indicating the -n or --netrc option. The machine name is youtube in that case. -
  • The simulate mode (activated with -s or --simulate) can be used to just get the real video URL and use it with a download manager if you prefer that option. -
  • The quiet mode (activated with -q or --quiet) can be used to supress all output messages. This allows, in systems featuring /dev/stdout and other similar special files, outputting the video data to standard output in order to pipe it to another program without interferences. -
  • The program can be told to simply print the final video URL to standard output using the -g or --get-url option. -
  • In a similar line, the -e or --get-title option tells the program to print the video title. -
  • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the literal title in the filename with the -l or --literal option. -
  • You can make the program append &fmt=something to the URL by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
  • The -b or --best-quality option can be used to download the highest available quality version of any given video. -
  • The -m or --mobile-version option is an alias for -f 17. -
  • The -d or --high-def option is an alias for -f 22. -
  • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option. -
  • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line. -
  • The program can be told not to overwrite existing files using the -w or --no-overwrites option. -
  • It can be told to attempt to continue interrupted downloads with the -c or --continue option. -
  • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist. -
  • For YouTube, you can also use the special word ytsearch to download search results. With ytsearch it will download the first search result. With ytsearchN, where N is a number, it will download the first N results. With ytsearchall it will download every result for that search. In most systems you'll need to use quotes for multiple words. Example: youtube-dl "ytsearch3:cute kittens". -
  • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// prefix out. -
  • You can get the program version by calling it as youtube-dl -v or youtube-dl --version. -
  • For usage instructions, use youtube-dl -h or youtube-dl --help. -
  • You can cancel the program at any time pressing Ctrl+C. It may print some error lines saying something about KeyboardInterrupt. That's ok. -
-

Download it

-

Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to right-click on it and choose the appropriate option, normally called Save Target As or Save Link As, depending on the web browser you are using.

-

2010.04.04

-
  • MD5: 6c3e2982b5a46ede4fd5d6e6ed280331 -
  • SHA1: 2edaa9a95f26bbd1e871314f3fd5e8e87b1bd576 -
  • SHA256: 69aa7757feb01c5faf68414a0220bed1e9210900059b1a7d7c7f2c53e788ae26 -
-

Output template

-

The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in youtube-dl -o funny_video.flv "http://some/video". However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format %(NAME)s. To clarify, that's a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:

-
  • id: The sequence will be replaced by the video identifier. -
  • url: The sequence will be replaced by the video URL. -
  • uploader: The sequence will be replaced by the nickname of the person who uploaded the video. -
  • title: The sequence will be replaced by the literal video title. -
  • stitle: The sequence will be replaced by a simplified video title, restricted to alphanumeric characters and dashes. -
  • ext: The sequence will be replaced by the appropriate extension (like flv or mp4). -
  • epoch: The sequence will be replaced by the Unix epoch when creating the file. -
  • ord: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. -
-

As you may have guessed, the default template is %(id)s.%(ext)s. When some command line options are used, it's replaced by other templates like %(title)s-%(id)s.%(ext)s. You can specify your own.

-

Authors

-
  • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor. -
  • Danny Colligan: YouTube search InfoExtractor, ideas and patches. -
  • Benjamin Johnson: Google Video InfoExtractor, Photobucket InfoExtractor, Yahoo! Video InfoExtractor, generic InfoExtractor, ideas, patches, etc. -
  • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much. -
-

Copyright © 2006-2010 Ricardo Garcia Gonzalez

- - - -
- - -
-
- -
- - - diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index ac85c2f..0e3e705 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.07.22 +Version: 2010.08.04 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Aug 05 2010 Till Maas - 2010.08.04-1 +- Update to latest release + * Fri Jul 23 2010 Till Maas - 2010.07.22-1 - Update to latest release