From efe36ab29de46bbf199a040f003ec4beb4ec732f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Thu, 15 Jul 2010 18:01:08 +0000 Subject: [PATCH] - Update to latest release --- Home | 139 ++++++++++++++++++----------------------- youtube-dl | 160 +++++++++++++++++++++++++++++++++++++++++++----- youtube-dl.spec | 5 +- 3 files changed, 207 insertions(+), 97 deletions(-) diff --git a/Home b/Home index 24651a9..cf1d5c0 100644 --- a/Home +++ b/Home @@ -9,12 +9,12 @@ - - + +
@@ -86,29 +72,13 @@
-
-
-
- - - -
-
-
-
- - -
-
-
-
@@ -207,7 +177,7 @@
  • - Issues (10) » + Issues (12) »
  • @@ -265,9 +235,9 @@ @@ -283,7 +253,7 @@ @@ -293,7 +263,9 @@ @@ -378,9 +348,9 @@ @@ -401,7 +371,7 @@

    youtube-dl is a small command-line program for downloading videos from YouTube.com.

    -
    Clone this repository (size: 269.0 KB): HTTPS / SSH
    +
    Clone this repository (size: 287.0 KB): HTTPS / SSH
    $ hg clone http://bitbucket.org/rg3/youtube-dl
    @@ -443,46 +413,41 @@

    youtube-dl: Download videos from YouTube.com

    (and more...)

    What is it?

    -

    youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.06.06. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

    +

    youtube-dl is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 4), and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version is 2010.07.14. It's released to the public domain, which means you can modify it, redistribute it or use it however you like.

    I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't guarantee anything. If you detect it stops working, check for new versions and/or inform me about the problem, indicating the program version you are using. If the program stops working and I can't solve the problem but you have a solution, I'd like to know it. If that happens and you feel you can maintain the program yourself, tell me. My contact information is at rg03.wordpress.com.

    Thanks for all the feedback received so far. I'm glad people find my program useful.

    Usage instructions

    In Windows, once you have installed the Python interpreter, save the program with the .py extension and put it somewhere in the PATH. Try to follow the guide to install youtube-dl under Windows XP.

    In Unix, download it, give it execution permission and copy it to one of the PATH directories (typically, /usr/local/bin).

    -

    After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

    +

    After that, you should be able to call it from the command line as youtube-dl or youtube-dl.py. I will use youtube-dl in the following examples. Usage instructions are easy. Use youtube-dl followed by a video URL or identifier. Example: youtube-dl "http://www.youtube.com/watch?v=foobar". The video will be saved to the file foobar.flv in that example. As YouTube.com videos are usually in Flash Video format, their extension should be flv. In Linux and other unices, video players using a recent version of ffmpeg can play them. That includes MPlayer, VLC, etc. Those two work under Windows and other platforms, but you could also get a specific FLV player of your taste.

    If you try to run the program and you receive an error message containing the keyword SyntaxError near the end, it means your Python interpreter is too old.

    More usage tips

    -
    + + + diff --git a/youtube-dl b/youtube-dl index 01a61ba..e89c915 100644 --- a/youtube-dl +++ b/youtube-dl @@ -27,9 +27,9 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', } @@ -99,7 +99,7 @@ def sanitize_open(filename, open_mode): return (stream, filename) except (IOError, OSError), err: # In case of error, try to remove win32 forbidden chars - filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename) # An exception here should be caught in the caller stream = open(filename, open_mode) @@ -189,6 +189,7 @@ class FileDownloader(object): forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. + format_limit: Highest quality format to try. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. @@ -386,6 +387,10 @@ class FileDownloader(object): self.to_stdout(u'[download] Download completed') else: self.to_stdout(u'') + + def increment_downloads(self): + """Increment the ordinal that assigns a number to each file.""" + self._num_downloads += 1 def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -555,7 +560,6 @@ class FileDownloader(object): if content_length is not None and long(content_length) == resume_len: # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) - self._num_downloads += 1 return True else: # Because the server didn't let us @@ -582,7 +586,6 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) - self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False @@ -680,18 +683,20 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag + # Listed in order of priority for the -b option + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', '37': 'mp4', + '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', '45': 'webm', } @@ -807,6 +812,10 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(2) # Downloader parameters @@ -818,6 +827,13 @@ class YoutubeIE(InfoExtractor): params = self._downloader.params format_param = params.get('format', None) if format_param == '0': + format_limit = params.get('format_limit', None) + if format_limit is not None: + try: + # Start at a different format if the user has limited the maximum quality + quality_index = self._available_formats.index(format_limit) + except ValueError: + pass format_param = self._available_formats[quality_index] best_quality = True elif format_param == '-1': @@ -838,7 +854,7 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = mobj.group(1) else: @@ -1026,6 +1042,10 @@ class MetacafeIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1085,6 +1105,94 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + mobj = re.search(r'(?im)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1115,6 +1223,9 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1223,6 +1334,9 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1298,13 +1412,16 @@ class YahooIE(InfoExtractor): def _real_initialize(self): return - def _real_extract(self, url): + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None and new_video: + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1331,7 +1448,7 @@ class YahooIE(InfoExtractor): yahoo_vid = mobj.group(1) url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) - return self._real_extract(url) + return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -1450,6 +1567,10 @@ class GenericIE(InfoExtractor): return def _real_extract(self, url): + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = url.split('/')[-1] request = urllib2.Request(url) try: @@ -2001,22 +2122,22 @@ if __name__ == '__main__': parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', - dest='retries', metavar='T', help='number of retries (default is 10)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') + dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') + dest='password', metavar='PASSWORD', help='account password') authentication.add_option('-n', '--netrc', action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) parser.add_option_group(authentication) video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', @@ -2025,6 +2146,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2050,9 +2173,9 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -2101,6 +2224,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) @@ -2123,6 +2247,7 @@ if __name__ == '__main__': 'forcedescription': opts.getdescription, 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, + 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') @@ -2141,6 +2266,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) fd.add_info_extractor(google_search_ie) diff --git a/youtube-dl.spec b/youtube-dl.spec index 53607b5..fd32319 100644 --- a/youtube-dl.spec +++ b/youtube-dl.spec @@ -1,5 +1,5 @@ Name: youtube-dl -Version: 2010.06.06 +Version: 2010.07.14 Release: 1%{?dist} Summary: Small command-line program to download videos from YouTube Summary(pl): Tekstowy program do pobierania filmów z youtube.com @@ -39,6 +39,9 @@ rm -rf $RPM_BUILD_ROOT %doc index.html %changelog +* Thu Jul 15 2010 Till Maas - 2010.07.14-1 +- Update to latest release + * Mon Jun 07 2010 Till Maas - 2010.06.06-1 - Update to latest release