python-urlgrabber/urlgrabber-HEAD.patch

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1ffe416
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+*.py[co]
+MANIFEST
+dist
+build
+*.kdev*
+*.kateproject
+ipython.log*
diff --git a/scripts/urlgrabber b/scripts/urlgrabber
index 518e512..09cd896 100644
--- a/scripts/urlgrabber
+++ b/scripts/urlgrabber
@@ -115,6 +115,7 @@ options:
                     including quotes in the case of strings.
                     e.g.  --user_agent='"foobar/2.0"'

+  --output FILE
   -o FILE           write output to FILE, otherwise the basename of the
                     url will be used
   -O                print the names of saved files to STDOUT
@@ -170,12 +171,17 @@ class client_options:
         return ug_options, ug_defaults

     def process_command_line(self):
-        short_options = 'vd:hoOpD'
+        short_options = 'vd:ho:OpD'
         long_options = ['profile', 'repeat=', 'verbose=',
-                        'debug=', 'help', 'progress']
+                        'debug=', 'help', 'progress', 'output=']
         ug_long = [ o + '=' for o in self.ug_options ]
-        optlist, args = getopt.getopt(sys.argv[1:], short_options,
-                                      long_options + ug_long)
+        try:
+            optlist, args = getopt.getopt(sys.argv[1:], short_options,
+                                          long_options + ug_long)
+        except getopt.GetoptError, e:
+            print >>sys.stderr, "Error:", e
+            self.help([], ret=1)
+
         self.verbose = 0
         self.debug = None
         self.outputfile = None
@@ -193,6 +199,7 @@ class client_options:
             if o == '--verbose': self.verbose = v
             if o == '-v':        self.verbose += 1
             if o == '-o':        self.outputfile = v
+            if o == '--output':  self.outputfile = v
             if o == '-p' or o == '--progress': self.progress = 1
             if o == '-d' or o == '--debug': self.debug = v
             if o == '--profile': self.profile = 1
@@ -222,7 +229,7 @@ class client_options:
             print "ERROR: cannot use -o when grabbing multiple files"
             sys.exit(1)

-    def help(self, args):
+    def help(self, args, ret=0):
         if not args:
             print MAINHELP
         else:
@@ -234,7 +241,7 @@ class client_options:
                     self.help_ug_option(a)
                 else:
                     print 'ERROR: no help on command "%s"' % a
-        sys.exit(0)
+        sys.exit(ret)

     def help_doc(self):
         print __doc__
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755
index 0000000..3da55a4
--- /dev/null
+++ b/scripts/urlgrabber-ext-down
@@ -0,0 +1,72 @@
+#! /usr/bin/python
+#  A very simple external downloader
+#  Copyright 2011-2012 Zdenek Pavlas
+
+#   This library is free software; you can redistribute it and/or
+#   modify it under the terms of the GNU Lesser General Public
+#   License as published by the Free Software Foundation; either
+#   version 2.1 of the License, or (at your option) any later version.
+#
+#   This library is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   Lesser General Public License for more details.
+#
+#   You should have received a copy of the GNU Lesser General Public
+#   License along with this library; if not, write to the
+#      Free Software Foundation, Inc.,
+#      59 Temple Place, Suite 330,
+#      Boston, MA  02111-1307  USA
+
+import time, os, errno, sys
+from urlgrabber.grabber import \
+    _readlines, URLGrabberOptions, _loads, \
+    PyCurlFileObject, URLGrabError
+
+def write(fmt, *arg):
+    try: os.write(1, fmt % arg)
+    except OSError, e:
+        if e.args[0] != errno.EPIPE: raise
+        sys.exit(1)
+
+class ProxyProgress:
+    def start(self, *d1, **d2):
+        self.next_update = 0
+    def update(self, _amount_read):
+        t = time.time()
+        if t < self.next_update: return
+        self.next_update = t + 0.31
+        write('%d %d\n', self._id, _amount_read)
+
+def main():
+    import signal
+    signal.signal(signal.SIGINT, lambda n, f: sys.exit(1))
+    cnt = 0
+    while True:
+        lines = _readlines(0)
+        if not lines: break
+        for line in lines:
+            cnt += 1
+            opts = URLGrabberOptions()
+            opts._id = cnt
+            for k in line.split(' '):
+                k, v = k.split('=', 1)
+                setattr(opts, k, _loads(v))
+            if opts.progress_obj:
+                opts.progress_obj = ProxyProgress()
+                opts.progress_obj._id = cnt
+            tm = time.time()
+            try:
+                fo = PyCurlFileObject(opts.url, opts.filename, opts)
+                fo._do_grab()
+                fo.fo.close()
+                size = fo._amount_read
+                dlsz = size - fo._reget_length
+                ug_err = 'OK'
+            except URLGrabError, e:
+                size = dlsz = 0
+                ug_err = '%d %s' % e.args
+            write('%d %d %d %.3f %s\n', opts._id, size, dlsz, time.time() - tm, ug_err)
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
index d0b87b8..bfa4a18 100644
--- a/setup.py
+++ b/setup.py
@@ -15,8 +15,10 @@ url = _urlgrabber.__url__
 packages = ['urlgrabber']
 package_dir = {'urlgrabber':'urlgrabber'}
 scripts = ['scripts/urlgrabber']
-data_files = [('share/doc/' + name + '-' + version,
-               ['README','LICENSE', 'TODO', 'ChangeLog'])]
+data_files = [
+    ('share/doc/' + name + '-' + version, ['README','LICENSE', 'TODO', 'ChangeLog']),
+    ('libexec', ['scripts/urlgrabber-ext-down']),
+]
 options = { 'clean' : { 'all' : 1 } }
 classifiers = [
         'Development Status :: 4 - Beta',
diff --git a/test/base_test_code.py b/test/base_test_code.py
index 50c6348..5fb43f9 100644
--- a/test/base_test_code.py
+++ b/test/base_test_code.py
@@ -1,6 +1,6 @@
 from munittest import *

-base_http = 'http://www.linux.duke.edu/projects/urlgrabber/test/'
+base_http = 'http://urlgrabber.baseurl.org/test/'
 base_ftp  = 'ftp://localhost/test/'

 # set to a proftp server only. we're working around a couple of
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 3e5f3b7..8eeaeda 100644
--- a/urlgrabber/byterange.py
+++ b/urlgrabber/byterange.py
@@ -68,7 +68,7 @@ class HTTPRangeHandler(urllib2.BaseHandler):

     def http_error_416(self, req, fp, code, msg, hdrs):
         # HTTP's Range Not Satisfiable error
-        raise RangeError('Requested Range Not Satisfiable')
+        raise RangeError(9, 'Requested Range Not Satisfiable')

 class HTTPSRangeHandler(HTTPRangeHandler):
     """ Range Header support for HTTPS. """
@@ -208,7 +208,7 @@ class RangeableFileObject:
                 bufsize = offset - pos
             buf = self.fo.read(bufsize)
             if len(buf) != bufsize:
-                raise RangeError('Requested Range Not Satisfiable')
+                raise RangeError(9, 'Requested Range Not Satisfiable')
             pos+= bufsize

 class FileRangeHandler(urllib2.FileHandler):
@@ -238,7 +238,7 @@ class FileRangeHandler(urllib2.FileHandler):
             (fb,lb) = brange
             if lb == '': lb = size
             if fb < 0 or fb > size or lb > size:
-                raise RangeError('Requested Range Not Satisfiable')
+                raise RangeError(9, 'Requested Range Not Satisfiable')
             size = (lb - fb)
             fo = RangeableFileObject(fo, (fb,lb))
         headers = mimetools.Message(StringIO(
@@ -318,12 +318,12 @@ class FTPRangeHandler(urllib2.FTPHandler):
                 (fb,lb) = range_tup
                 if lb == '':
                     if retrlen is None or retrlen == 0:
-                        raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
+                        raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.')
                     lb = retrlen
                     retrlen = lb - fb
                     if retrlen < 0:
                         # beginning of range is larger than file
-                        raise RangeError('Requested Range Not Satisfiable')
+                        raise RangeError(9, 'Requested Range Not Satisfiable')
                 else:
                     retrlen = lb - fb
                     fp = RangeableFileObject(fp, (0,retrlen))
@@ -458,6 +458,6 @@ def range_tuple_normalize(range_tup):
     # check if range is over the entire file
     if (fb,lb) == (0,''): return None
     # check that the range is valid
-    if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
+    if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb))
     return (fb,lb)

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index e090e90..daa478d 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
   progress_obj = None

     a class instance that supports the following methods:
-      po.start(filename, url, basename, length, text)
+      po.start(filename, url, basename, size, now, text)
       # length will be None if unknown
       po.update(read) # read == bytes read so far
       po.end()

+  multi_progress_obj = None
+
+    a class instance that supports the following methods:
+      mo.start(total_files, total_size)
+      mo.newMeter() => meter
+      mo.removeMeter(meter)
+      mo.end()
+
+   The 'meter' object is similar to progress_obj, but multiple
+   instances may be created and updated at the same time.
+
+   When downloading multiple files in parallel and multi_progress_obj
+   is None progress_obj is used in compatibility mode: finished files
+   are shown but there's no in-progress display.
+
   text = None

     specifies alternative text to be passed to the progress meter
@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
     (which can be set on default_grabber.throttle) is used. See
     BANDWIDTH THROTTLING for more information.

-  timeout = None
+  timeout = 300

-    a positive float expressing the number of seconds to wait for socket
-    operations. If the value is None or 0.0, socket operations will block
-    forever. Setting this option causes urlgrabber to call the settimeout
-    method on the Socket object used for the request. See the Python
-    documentation on settimeout for more information.
-    http://www.python.org/doc/current/lib/socket-objects.html
+    a positive integer expressing the number of seconds to wait before
+    timing out attempts to connect to a server. If the value is None
+    or 0, connection attempts will not time out. The timeout is passed
+    to the underlying pycurl object as its CONNECTTIMEOUT option, see
+    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
+    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT

   bandwidth = 0

@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
     note that proxy authentication information may be provided using
     normal URL constructs:
       proxies={ 'http' : 'http://user:host@foo:3128' }
-    Lastly, if proxies is None, the default environment settings will
-    be used.
+
+  libproxy = False
+
+    Use the libproxy module (if installed) to find proxies.
+    The libproxy code is only used if the proxies dictionary
+    does not provide any proxies.

   prefix = None

@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
     control, you should probably subclass URLParser and pass it in via
     the 'urlparser' option.

+  username = None
+    username to use for simple http auth - is automatically quoted for special characters
+
+  password = None
+    password to use for simple http auth - is automatically quoted for special characters
+
   ssl_ca_cert = None

     this option can be used if M2Crypto is available and will be
@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
     No-op when using the curl backend (default)


-  self.ssl_verify_peer = True
+  ssl_verify_peer = True

     Check the server's certificate to make sure it is valid with what our CA validates

-  self.ssl_verify_host = True
+  ssl_verify_host = True

     Check the server's hostname to make sure it matches the certificate DN

-  self.ssl_key = None
+  ssl_key = None

     Path to the key the client should use to connect/authenticate with

-  self.ssl_key_type = 'PEM'
+  ssl_key_type = 'PEM'

     PEM or DER - format of key

-  self.ssl_cert = None
+  ssl_cert = None

     Path to the ssl certificate the client should use to to authenticate with

-  self.ssl_cert_type = 'PEM'
+  ssl_cert_type = 'PEM'

     PEM or DER - format of certificate

-  self.ssl_key_pass = None
+  ssl_key_pass = None

     password to access the ssl_key

-  self.size = None
+  size = None

     size (in bytes) or Maximum size of the thing being downloaded.
     This is mostly to keep us from exploding with an endless datastream

-  self.max_header_size = 2097152
+  max_header_size = 2097152

     Maximum size (in bytes) of the headers.

+  ip_resolve = 'whatever'
+
+    What type of name to IP resolving to use, default is to do both IPV4 and
+    IPV6.
+
+  async = (key, limit)
+
+    When this option is set, the urlgrab() is not processed immediately
+    but queued.  parallel_wait() then processes grabs in parallel, limiting
+    the numer of connections in each 'key' group to at most 'limit'.
+
+  max_connections
+
+    The global connection limit.
+
+  timedhosts
+
+    The filename of the host download statistics.  If defined, urlgrabber
+    will update the stats at the end of every download.  At the end of
+    parallel_wait(), the updated stats are saved.  If synchronous grabs
+    are used, you should call th_save().
+
+  default_speed, half_life
+
+    These options only affect the async mirror selection code.
+    The default_speed option sets the speed estimate for mirrors
+    we have never downloaded from, and defaults to 1 MBps.
+
+    The speed estimate also drifts exponentially from the speed
+    actually measured to the default speed, with default
+    period of 30 days.
+

 RETRY RELATED ARGUMENTS

@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
     but it cannot (without severe trickiness) prevent the exception
     from being raised.

+  failfunc = None
+
+    The callback that gets called when urlgrab request fails.
+    If defined, urlgrab() calls it instead of raising URLGrabError.
+    Callback syntax is identical to failure_callback.
+
+    Contrary to failure_callback, it's called only once.  It's primary
+    purpose is to use urlgrab() without a try/except block.
+
   interrupt_callback = None

     This callback is called if KeyboardInterrupt is received at any
@@ -420,6 +486,7 @@ import time
 import string
 import urllib
 import urllib2
+from httplib import responses
 import mimetools
 import thread
 import types
@@ -428,9 +495,17 @@ import pycurl
 from ftplib import parse150
 from StringIO import StringIO
 from httplib import HTTPException
-import socket
+import socket, select, fcntl
 from byterange import range_tuple_normalize, range_tuple_to_header, RangeError

+try:
+    import xattr
+    if not hasattr(xattr, 'set'):
+        xattr = None # This is a "newer" API.
+except ImportError:
+    xattr = None
+
+
 ########################################################################
 #                     MODULE INITIALIZATION
 ########################################################################
@@ -439,6 +514,12 @@ try:
 except:
     __version__ = '???'

+try:
+    # this part isn't going to do much - need to talk to gettext
+    from i18n import _
+except ImportError, msg:
+    def _(st): return st
+
 ########################################################################
 # functions for debugging output.  These functions are here because they
 # are also part of the module initialization.
@@ -527,6 +608,22 @@ def _(st):
 #                 END MODULE INITIALIZATION
 ########################################################################

+########################################################################
+#                 UTILITY FUNCTIONS
+########################################################################
+
+# These functions are meant to be utilities for the urlgrabber library to use.
+
+def _to_utf8(obj, errors='replace'):
+    '''convert 'unicode' to an encoded utf-8 byte string '''
+    # stolen from yum.i18n
+    if isinstance(obj, unicode):
+        obj = obj.encode('utf-8', errors)
+    return obj
+
+########################################################################
+#                 END UTILITY FUNCTIONS
+########################################################################


 class URLGrabError(IOError):
@@ -662,6 +759,7 @@ class URLParser:
           opts.quote = 0     --> do not quote it
           opts.quote = None  --> guess
         """
+        url = _to_utf8(url)
         quote = opts.quote

         if opts.prefix:
@@ -768,6 +866,41 @@ class URLGrabberOptions:
         else: # throttle is a float
             return self.bandwidth * self.throttle

+    def find_proxy(self, url, scheme):
+        """Find the proxy to use for this URL.
+        Use the proxies dictionary first, then libproxy.
+        """
+        self.proxy = None
+        if scheme not in ('ftp', 'http', 'https'):
+            return
+
+        if self.proxies:
+            proxy = self.proxies.get(scheme)
+            if proxy is None:
+                if scheme == 'http':
+                    proxy = self.proxies.get('https')
+                elif scheme == 'https':
+                    proxy = self.proxies.get('http')
+            if proxy == '_none_':
+                proxy = ''
+            self.proxy = proxy
+            return
+
+        if self.libproxy:
+            global _libproxy_cache
+            if _libproxy_cache is None:
+                try:
+                    import libproxy
+                    _libproxy_cache = libproxy.ProxyFactory()
+                except:
+                    _libproxy_cache = False
+            if _libproxy_cache:
+                for proxy in _libproxy_cache.getProxies(url):
+                    if proxy.startswith('http://'):
+                        if DEBUG: DEBUG.info('using proxy "%s" for url %s' % (proxy, url))
+                        self.proxy = proxy
+                        break
+
     def derive(self, **kwargs):
         """Create a derived URLGrabberOptions instance.
         This method creates a new instance and overrides the
@@ -791,30 +924,37 @@ class URLGrabberOptions:
         provided here.
         """
         self.progress_obj = None
+        self.multi_progress_obj = None
         self.throttle = 1.0
         self.bandwidth = 0
         self.retry = None
         self.retrycodes = [-1,2,4,5,6,7]
         self.checkfunc = None
+        self.failfunc = _do_raise
         self.copy_local = 0
         self.close_connection = 0
         self.range = None
         self.user_agent = 'urlgrabber/%s' % __version__
+        self.ip_resolve = None
         self.keepalive = 1
         self.proxies = None
+        self.libproxy = False
+        self.proxy = None
         self.reget = None
         self.failure_callback = None
         self.interrupt_callback = None
         self.prefix = None
         self.opener = None
         self.cache_openers = True
-        self.timeout = None
+        self.timeout = 300
         self.text = None
         self.http_headers = None
         self.ftp_headers = None
         self.data = None
         self.urlparser = URLParser()
         self.quote = None
+        self.username = None
+        self.password = None
         self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
         self.ssl_context = None # no-op in pycurl
         self.ssl_verify_peer = True # check peer's cert for authenticityb
@@ -827,6 +967,12 @@ class URLGrabberOptions:
         self.size = None # if we know how big the thing we're getting is going
                          # to be. this is ultimately a MAXIMUM size for the file
         self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
+        self.async = None # blocking by default
+        self.mirror_group = None
+        self.max_connections = 5
+        self.timedhosts = None
+        self.half_life = 30*24*60*60 # 30 days
+        self.default_speed = 1e6 # 1 MBit

     def __repr__(self):
         return self.format()
@@ -846,7 +992,18 @@ class URLGrabberOptions:
         s = s + indent + '}'
         return s

-class URLGrabber:
+def _do_raise(obj):
+    raise obj.exception
+
+def _run_callback(cb, obj):
+    if not cb:
+        return
+    if callable(cb):
+        return cb(obj)
+    cb, arg, karg = cb
+    return cb(obj, *arg, **karg)
+
+class URLGrabber(object):
     """Provides easy opening of URLs with a variety of options.

     All options are specified as kwargs. Options may be specified when
@@ -872,7 +1029,6 @@ class URLGrabber:
             # beware of infinite loops :)
             tries = tries + 1
             exception = None
-            retrycode = None
             callback  = None
             if DEBUG: DEBUG.info('attempt %i/%s: %s',
                                  tries, opts.retry, args[0])
@@ -883,54 +1039,62 @@ class URLGrabber:
             except URLGrabError, e:
                 exception = e
                 callback = opts.failure_callback
-                retrycode = e.errno
             except KeyboardInterrupt, e:
                 exception = e
                 callback = opts.interrupt_callback
+                if not callback:
+                    raise

             if DEBUG: DEBUG.info('exception: %s', exception)
             if callback:
                 if DEBUG: DEBUG.info('calling callback: %s', callback)
-                cb_func, cb_args, cb_kwargs = self._make_callback(callback)
                 obj = CallbackObject(exception=exception, url=args[0],
                                      tries=tries, retry=opts.retry)
-                cb_func(obj, *cb_args, **cb_kwargs)
+                _run_callback(callback, obj)

             if (opts.retry is None) or (tries == opts.retry):
                 if DEBUG: DEBUG.info('retries exceeded, re-raising')
                 raise

+            retrycode = getattr(exception, 'errno', None)
             if (retrycode is not None) and (retrycode not in opts.retrycodes):
                 if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
                                      retrycode, opts.retrycodes)
                 raise

-    def urlopen(self, url, **kwargs):
+    def urlopen(self, url, opts=None, **kwargs):
         """open the url and return a file object
         If a progress object or throttle value specified when this
         object was created, then  a special file object will be
         returned that supports them. The file object can be treated
         like any other file object.
         """
-        opts = self.opts.derive(**kwargs)
+        url = _to_utf8(url)
+        opts = (opts or self.opts).derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts)
+        opts.find_proxy(url, parts[0])
         def retryfunc(opts, url):
             return PyCurlFileObject(url, filename=None, opts=opts)
         return self._retry(opts, retryfunc, url)

-    def urlgrab(self, url, filename=None, **kwargs):
+    def urlgrab(self, url, filename=None, opts=None, **kwargs):
         """grab the file at <url> and make a local copy at <filename>
         If filename is none, the basename of the url is used.
         urlgrab returns the filename of the local file, which may be
         different from the passed-in filename if copy_local == 0.
         """
-        opts = self.opts.derive(**kwargs)
+        url = _to_utf8(url)
+        opts = (opts or self.opts).derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts)
         (scheme, host, path, parm, query, frag) = parts
+        opts.find_proxy(url, scheme)
         if filename is None:
             filename = os.path.basename( urllib.unquote(path) )
+            if not filename:
+                # This is better than nothing.
+                filename = 'index.html'
         if scheme == 'file' and not opts.copy_local:
             # just return the name of the local file - don't make a
             # copy currently
@@ -950,41 +1114,49 @@ class URLGrabber:

             elif not opts.range:
                 if not opts.checkfunc is None:
-                    cb_func, cb_args, cb_kwargs = \
-                       self._make_callback(opts.checkfunc)
-                    obj = CallbackObject()
-                    obj.filename = path
-                    obj.url = url
-                    apply(cb_func, (obj, )+cb_args, cb_kwargs)
+                    obj = CallbackObject(filename=path, url=url)
+                    _run_callback(opts.checkfunc, obj)
                 return path

+        if opts.async:
+            opts.url = url
+            opts.filename = filename
+            opts.size = int(opts.size or 0)
+            _async_queue.append(opts)
+            return filename
+
         def retryfunc(opts, url, filename):
+            tm = time.time()
             fo = PyCurlFileObject(url, filename, opts)
             try:
                 fo._do_grab()
+                _TH.update(url, fo._amount_read - fo._reget_length, time.time() - tm, None)
                 if not opts.checkfunc is None:
-                    cb_func, cb_args, cb_kwargs = \
-                             self._make_callback(opts.checkfunc)
-                    obj = CallbackObject()
-                    obj.filename = filename
-                    obj.url = url
-                    apply(cb_func, (obj, )+cb_args, cb_kwargs)
+                    obj = CallbackObject(filename=filename, url=url)
+                    _run_callback(opts.checkfunc, obj)
             finally:
                 fo.close()
             return filename

-        return self._retry(opts, retryfunc, url, filename)
+        try:
+            return self._retry(opts, retryfunc, url, filename)
+        except URLGrabError, e:
+            _TH.update(url, 0, 0, e)
+            opts.exception = e
+            return _run_callback(opts.failfunc, opts)

-    def urlread(self, url, limit=None, **kwargs):
+    def urlread(self, url, limit=None, opts=None, **kwargs):
         """read the url into a string, up to 'limit' bytes
         If the limit is exceeded, an exception will be thrown.  Note
         that urlread is NOT intended to be used as a way of saying
         "I want the first N bytes" but rather 'read the whole file
         into memory, but don't use too much'
         """
-        opts = self.opts.derive(**kwargs)
+        url = _to_utf8(url)
+        opts = (opts or self.opts).derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts)
+        opts.find_proxy(url, parts[0])
         if limit is not None:
             limit = limit + 1

@@ -1000,12 +1172,8 @@ class URLGrabber:
                 else: s = fo.read(limit)

                 if not opts.checkfunc is None:
-                    cb_func, cb_args, cb_kwargs = \
-                             self._make_callback(opts.checkfunc)
-                    obj = CallbackObject()
-                    obj.data = s
-                    obj.url = url
-                    apply(cb_func, (obj, )+cb_args, cb_kwargs)
+                    obj = CallbackObject(data=s, url=url)
+                    _run_callback(opts.checkfunc, obj)
             finally:
                 fo.close()
             return s
@@ -1020,6 +1188,7 @@ class URLGrabber:
         return s

     def _make_callback(self, callback_obj):
+        # not used, left for compatibility
         if callable(callback_obj):
             return callback_obj, (), {}
         else:
@@ -1030,7 +1199,7 @@ class URLGrabber:
 default_grabber = URLGrabber()


-class PyCurlFileObject():
+class PyCurlFileObject(object):
     def __init__(self, url, filename, opts):
         self.fo = None
         self._hdr_dump = ''
@@ -1052,10 +1221,11 @@ class PyCurlFileObject():
         self._reget_length = 0
         self._prog_running = False
         self._error = (None, None)
-        self.size = None
+        self.size = 0
+        self._hdr_ended = False
         self._do_open()

-
+
     def __getattr__(self, name):
         """This effectively allows us to wrap at the instance level.
         Any attribute not found in _this_ object will be searched for
@@ -1085,9 +1255,14 @@ class PyCurlFileObject():
             return -1

     def _hdr_retrieve(self, buf):
+        if self._hdr_ended:
+            self._hdr_dump = ''
+            self.size = 0
+            self._hdr_ended = False
+
         if self._over_max_size(cur=len(self._hdr_dump),
                                max_size=self.opts.max_header_size):
-            return -1
+            return -1
         try:
             self._hdr_dump += buf
             # we have to get the size before we do the progress obj start
@@ -1104,7 +1279,17 @@ class PyCurlFileObject():
                     s = parse150(buf)
                 if s:
                     self.size = int(s)
-
+
+            if buf.lower().find('location') != -1:
+                location = ':'.join(buf.split(':')[1:])
+                location = location.strip()
+                self.scheme = urlparse.urlsplit(location)[0]
+                self.url = location
+
+            if len(self._hdr_dump) != 0 and buf == '\r\n':
+                self._hdr_ended = True
+                if DEBUG: DEBUG.info('header ended:')
+
             return len(buf)
         except KeyboardInterrupt:
             return pycurl.READFUNC_ABORT
@@ -1113,8 +1298,10 @@ class PyCurlFileObject():
         if self._parsed_hdr:
             return self._parsed_hdr
         statusend = self._hdr_dump.find('\n')
+        statusend += 1 # ridiculous as it may seem.
         hdrfp = StringIO()
         hdrfp.write(self._hdr_dump[statusend:])
+        hdrfp.seek(0)
         self._parsed_hdr =  mimetools.Message(hdrfp)
         return self._parsed_hdr

@@ -1127,6 +1314,9 @@ class PyCurlFileObject():
         if not opts:
             opts = self.opts

+        # keepalives
+        if not opts.keepalive:
+            self.curl_obj.setopt(pycurl.FORBID_REUSE, 1)

         # defaults we're always going to set
         self.curl_obj.setopt(pycurl.NOPROGRESS, False)
@@ -1136,11 +1326,21 @@ class PyCurlFileObject():
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
         self.curl_obj.setopt(pycurl.FAILONERROR, True)
         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
+        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)

         if DEBUG:
             self.curl_obj.setopt(pycurl.VERBOSE, True)
         if opts.user_agent:
             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
+        if opts.ip_resolve:
+            # Default is: IPRESOLVE_WHATEVER
+            ipr = opts.ip_resolve.lower()
+            if ipr == 'whatever': # Do we need this?
+                self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
+            if ipr == 'ipv4':
+                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+            if ipr == 'ipv6':
+                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)

         # maybe to be options later
         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
@@ -1148,9 +1348,11 @@ class PyCurlFileObject():

         # timeouts
         timeout = 300
-        if opts.timeout:
-            timeout = int(opts.timeout)
-            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+        if hasattr(opts, 'timeout'):
+            timeout = int(opts.timeout or 0)
+        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)
+        self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)

         # ssl options
         if self.scheme == 'https':
@@ -1158,13 +1360,16 @@ class PyCurlFileObject():
                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                 self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
             self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
-            self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
+            if opts.ssl_verify_host: # 1 is meaningless to curl
+                self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, 2)
             if opts.ssl_key:
                 self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
             if opts.ssl_key_type:
                 self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type)
             if opts.ssl_cert:
                 self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
+                # if we have a client side cert - turn off reuse b/c nss is odd
+                self.curl_obj.setopt(pycurl.FORBID_REUSE, 1)
             if opts.ssl_cert_type:
                 self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
             if opts.ssl_key_pass:
@@ -1187,28 +1392,26 @@ class PyCurlFileObject():
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))

-        # proxy settings
-        if opts.proxies:
-            for (scheme, proxy) in opts.proxies.items():
-                if self.scheme in ('ftp'): # only set the ftp proxy for ftp items
-                    if scheme not in ('ftp'):
-                        continue
-                    else:
-                        if proxy == '_none_': proxy = ""
-                        self.curl_obj.setopt(pycurl.PROXY, proxy)
-                elif self.scheme in ('http', 'https'):
-                    if scheme not in ('http', 'https'):
-                        continue
-                    else:
-                        if proxy == '_none_': proxy = ""
-                        self.curl_obj.setopt(pycurl.PROXY, proxy)
-
-        # FIXME username/password/auth settings
+        # proxy
+        if opts.proxy is not None:
+            self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
+            self.curl_obj.setopt(pycurl.PROXYAUTH,
+                # All but Kerberos.  BZ 769254
+                pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+
+        if opts.username and opts.password:
+            if self.scheme in ('http', 'https'):
+                self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
+
+            if opts.username and opts.password:
+                # apparently when applying them as curlopts they do not require quoting of any kind
+                userpwd = '%s:%s' % (opts.username, opts.password)
+                self.curl_obj.setopt(pycurl.USERPWD, userpwd)

         #posts - simple - expects the fields as they are
         if opts.data:
             self.curl_obj.setopt(pycurl.POST, True)
-            self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
+            self.curl_obj.setopt(pycurl.POSTFIELDS, _to_utf8(opts.data))

         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1228,12 +1431,14 @@ class PyCurlFileObject():

             code = self.http_code
             errcode = e.args[0]
+            errurl = urllib.unquote(self.url)
+
             if self._error[0]:
                 errcode = self._error[0]

             if errcode == 23 and code >= 200 and code < 299:
-                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
-                err.url = self.url
+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
+                err.url = errurl

                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
@@ -1244,23 +1449,23 @@ class PyCurlFileObject():
                 raise KeyboardInterrupt

             elif errcode == 28:
-                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-                err.url = self.url
+                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
+                err.url = errurl
                 raise err
             elif errcode == 35:
                 msg = _("problem making ssl connection")
                 err = URLGrabError(14, msg)
-                err.url = self.url
+                err.url = errurl
                 raise err
             elif errcode == 37:
-                msg = _("Could not open/read %s") % (self.url)
+                msg = _("Could not open/read %s") % (errurl)
                 err = URLGrabError(14, msg)
-                err.url = self.url
+                err.url = errurl
                 raise err

             elif errcode == 42:
-                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
-                err.url = self.url
+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
+                err.url = errurl
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
                 # which almost always means something aborted it from outside
@@ -1272,33 +1477,94 @@ class PyCurlFileObject():
             elif errcode == 58:
                 msg = _("problem with the local client certificate")
                 err = URLGrabError(14, msg)
-                err.url = self.url
+                err.url = errurl
                 raise err

             elif errcode == 60:
-                msg = _("client cert cannot be verified or client cert incorrect")
+                msg = _("Peer cert cannot be verified or peer cert invalid")
                 err = URLGrabError(14, msg)
-                err.url = self.url
+                err.url = errurl
                 raise err

             elif errcode == 63:
                 if self._error[1]:
                     msg = self._error[1]
                 else:
-                    msg = _("Max download size exceeded on %s") % (self.url)
+                    msg = _("Max download size exceeded on %s") % ()
                 err = URLGrabError(14, msg)
-                err.url = self.url
+                err.url = errurl
                 raise err

             elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+                if self.scheme in ['http', 'https']:
+                    if self.http_code in responses:
+                        resp = responses[self.http_code]
+                        msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
+                    else:
+                        msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
+                elif self.scheme in ['ftp']:
+                    msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
+                else:
+                    msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
             else:
-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+                pyerr2str = { 5 : _("Couldn't resolve proxy"),
+                              6 : _("Couldn't resolve host"),
+                              7 : _("Couldn't connect"),
+                              8 : _("Bad reply to FTP server"),
+                              9 : _("Access denied"),
+                             11 : _("Bad reply to FTP pass"),
+                             13 : _("Bad reply to FTP pasv"),
+                             14 : _("Bad reply to FTP 227"),
+                             15 : _("Couldn't get FTP host"),
+                             17 : _("Couldn't set FTP type"),
+                             18 : _("Partial file"),
+                             19 : _("FTP RETR command failed"),
+                             22 : _("HTTP returned error"),
+                             23 : _("Write error"),
+                             25 : _("Upload failed"),
+                             26 : _("Read error"),
+                             27 : _("Out of Memory"),
+                             28 : _("Operation timed out"),
+                             30 : _("FTP PORT command failed"),
+                             31 : _("FTP REST command failed"),
+                             33 : _("Range failed"),
+                             34 : _("HTTP POST failed"),
+                             35 : _("SSL CONNECT failed"),
+                             36 : _("Couldn't resume download"),
+                             37 : _("Couldn't read file"),
+                             42 : _("Aborted by callback"),
+                             47 : _("Too many redirects"),
+                             51 : _("Peer certificate failed verification"),
+                             52 : _("Got nothing: SSL certificate expired?"),
+                             53 : _("SSL engine not found"),
+                             54 : _("SSL engine set failed"),
+                             55 : _("Network error send()"),
+                             56 : _("Network error recv()"),
+                             58 : _("Local certificate failed"),
+                             59 : _("SSL set cipher failed"),
+                             60 : _("Local CA certificate failed"),
+                             61 : _("HTTP bad transfer encoding"),
+                             63 : _("Maximum file size exceeded"),
+                             64 : _("FTP SSL failed"),
+                             67 : _("Authentication failure"),
+                             70 : _("Out of disk space on server"),
+                             73 : _("Remove file exists"),
+                              }
+                errstr = str(e.args[1])
+                if not errstr:
+                    errstr = pyerr2str.get(errcode, '<Unknown>')
+                msg = 'curl#%s - "%s"' % (errcode, errstr)
                 code = errcode
             err = URLGrabError(14, msg)
             err.code = code
             err.exception = e
             raise err
+        else:
+            if self._error[1]:
+                msg = self._error[1]
+                err = URLGrabError(14, msg)
+                err.url = urllib.unquote(self.url)
+                raise err

     def _do_open(self):
         self.curl_obj = _curl_cache
@@ -1333,7 +1599,11 @@ class PyCurlFileObject():

         if self.opts.range:
             rt = self.opts.range
-            if rt[0]: rt = (rt[0] + reget_length, rt[1])
+
+            if rt[0] is None:
+                rt = (0, rt[1])
+            rt = (rt[0] + reget_length, rt[1])
+

         if rt:
             header = range_tuple_to_header(rt)
@@ -1434,21 +1704,46 @@ class PyCurlFileObject():
             #fh, self._temp_name = mkstemp()
             #self.fo = open(self._temp_name, 'wb')

-
-        self._do_perform()
-
-
-
+        try:
+            self._do_perform()
+        except URLGrabError, e:
+            self.fo.flush()
+            self.fo.close()
+            raise e
+
         if _was_filename:
             # close it up
             self.fo.flush()
             self.fo.close()
+
+            # Set the URL where we got it from:
+            if xattr is not None:
+                # See: http://www.freedesktop.org/wiki/CommonExtendedAttributes
+                try:
+                    xattr.set(self.filename, 'user.xdg.origin.url', self.url)
+                except:
+                    pass # URL too long. = IOError ... ignore everything.
+
             # set the time
             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
             if mod_time != -1:
-                os.utime(self.filename, (mod_time, mod_time))
+                try:
+                    os.utime(self.filename, (mod_time, mod_time))
+                except OSError, e:
+                    err = URLGrabError(16, _(\
+                      'error setting timestamp on file %s from %s, OSError: %s')
+                              % (self.filename, self.url, e))
+                    err.url = self.url
+                    raise err
             # re open it
-            self.fo = open(self.filename, 'r')
+            try:
+                self.fo = open(self.filename, 'r')
+            except IOError, e:
+                err = URLGrabError(16, _(\
+                  'error opening file from %s, IOError: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
@@ -1526,17 +1821,20 @@ class PyCurlFileObject():
             if self._prog_running:
                 downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
-        except KeyboardInterrupt:
+        except (KeyboardInterrupt, IOError):
             return -1

     def _over_max_size(self, cur, max_size=None):

         if not max_size:
-            max_size = self.size
-        if self.opts.size: # if we set an opts size use that, no matter what
-            max_size = self.opts.size
+            if not self.opts.size:
+                max_size = self.size
+            else:
+                max_size = self.opts.size
+
         if not max_size: return False # if we have None for all of the Max then this is dumb
-        if cur > max_size + max_size*.10:
+
+        if cur > int(float(max_size) * 1.10):

             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
@@ -1544,13 +1842,6 @@ class PyCurlFileObject():
             return True
         return False

-    def _to_utf8(self, obj, errors='replace'):
-        '''convert 'unicode' to an encoded utf-8 byte string '''
-        # stolen from yum.i18n
-        if isinstance(obj, unicode):
-            obj = obj.encode('utf-8', errors)
-        return obj
-
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
@@ -1582,9 +1873,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()

-
+    def geturl(self):
+        """ Provide the geturl() method, used to be got from
+            urllib.addinfourl, via. urllib.URLopener.* """
+        return self.url
+
 _curl_cache = pycurl.Curl() # make one and reuse it over and over and over

+def reset_curl_obj():
+    """To make sure curl has reread the network/dns info we force a reload"""
+    global _curl_cache
+    _curl_cache.close()
+    _curl_cache = pycurl.Curl()
+
+_libproxy_cache = None
+

 #####################################################################
 # DEPRECATED FUNCTIONS
@@ -1621,6 +1924,460 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,


 #####################################################################
+#  Serializer + parser: A replacement of the rather bulky Json code.
+#
+# - handles basic python literals, lists and tuples.
+# - serialized strings never contain ' ' or '\n'
+#
+#####################################################################
+
+_quoter_map = {}
+for c in '%[(,)] \n':
+    _quoter_map[c] = '%%%02x' % ord(c)
+del c
+
+def _dumps(v):
+    if v is None: return 'None'
+    if v is True: return 'True'
+    if v is False: return 'False'
+    if type(v) in (int, long, float):
+        return str(v)
+    if type(v) == unicode:
+        v = v.encode('UTF8')
+    if type(v) == str:
+        def quoter(c): return _quoter_map.get(c, c)
+        return "'%s'" % ''.join(map(quoter, v))
+    if type(v) == tuple:
+        return "(%s)" % ','.join(map(_dumps, v))
+    if type(v) == list:
+        return "[%s]" % ','.join(map(_dumps, v))
+    raise TypeError, 'Can\'t serialize %s' % v
+
+def _loads(s):
+    def decode(v):
+        if v == 'None': return None
+        if v == 'True': return True
+        if v == 'False': return False
+        try: return int(v)
+        except ValueError: pass
+        try: return float(v)
+        except ValueError: pass
+        if len(v) >= 2 and v[0] == v[-1] == "'":
+            ret = []; i = 1
+            while True:
+                j = v.find('%', i)
+                ret.append(v[i:j]) # skips the final "'"
+                if j == -1: break
+                ret.append(chr(int(v[j + 1:j + 3], 16)))
+                i = j + 3
+            v = ''.join(ret)
+        return v
+    stk = None
+    l = []
+    i = j = 0
+    while True:
+        if j == len(s) or s[j] in ',)]':
+            if j > i:
+                l.append(decode(s[i:j]))
+            if j == len(s): break
+            if s[j] in ')]':
+                if s[j] == ')':
+                    l = tuple(l)
+                stk[0].append(l)
+                l, stk = stk
+            i = j = j + 1
+        elif s[j] in '[(':
+            stk = l, stk
+            l = []
+            i = j = j + 1
+        else:
+            j += 1 # safe because '[(,)]' are quoted
+    if stk: raise ValueError
+    if len(l) == 1: l = l[0]
+    return l
+
+
+#####################################################################
+#  External downloader process
+#####################################################################
+
+def _readlines(fd):
+    buf = os.read(fd, 4096)
+    if not buf: return None
+    # whole lines only, no buffering
+    while buf[-1] != '\n':
+        buf += os.read(fd, 4096)
+    return buf[:-1].split('\n')
+
+import subprocess
+
+class _ExternalDownloader:
+    def __init__(self):
+        self.popen = subprocess.Popen(
+            '/usr/libexec/urlgrabber-ext-down',
+            stdin = subprocess.PIPE,
+            stdout = subprocess.PIPE,
+        )
+        self.stdin  = self.popen.stdin.fileno()
+        self.stdout = self.popen.stdout.fileno()
+        self.running = {}
+        self.cnt = 0
+
+    # list of options we pass to downloader
+    _options = (
+        'url', 'filename',
+        'timeout', 'close_connection', 'keepalive',
+        'throttle', 'bandwidth', 'range', 'reget',
+        'user_agent', 'http_headers', 'ftp_headers',
+        'proxy', 'prefix', 'username', 'password',
+        'ssl_ca_cert',
+        'ssl_cert', 'ssl_cert_type',
+        'ssl_key', 'ssl_key_type',
+        'ssl_key_pass',
+        'ssl_verify_peer', 'ssl_verify_host',
+        'size', 'max_header_size', 'ip_resolve',
+    )
+
+    def start(self, opts):
+        arg = []
+        for k in self._options:
+            v = getattr(opts, k)
+            if v is None: continue
+            arg.append('%s=%s' % (k, _dumps(v)))
+        if opts.progress_obj and opts.multi_progress_obj:
+            arg.append('progress_obj=True')
+        arg = ' '.join(arg)
+        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
+
+        self.cnt += 1
+        self.running[self.cnt] = opts
+        os.write(self.stdin, arg +'\n')
+
+    def perform(self):
+        ret = []
+        lines = _readlines(self.stdout)
+        if not lines:
+            if DEBUG: DEBUG.info('downloader died')
+            raise KeyboardInterrupt
+        for line in lines:
+            # parse downloader output
+            line = line.split(' ', 5)
+            _id, size = map(int, line[:2])
+            if len(line) == 2:
+                self.running[_id]._progress.update(size)
+                continue
+            # job done
+            opts = self.running.pop(_id)
+            if line[4] == 'OK':
+                ug_err = None
+                if DEBUG: DEBUG.info('success')
+            else:
+                ug_err = URLGrabError(int(line[4]), line[5])
+                if DEBUG: DEBUG.info('failure: %s', err)
+            _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
+            ret.append((opts, size, ug_err))
+        return ret
+
+    def abort(self):
+        self.popen.stdin.close()
+        self.popen.stdout.close()
+        self.popen.wait()
+
+class _ExternalDownloaderPool:
+    def __init__(self):
+        self.epoll = select.epoll()
+        self.running = {}
+        self.cache = {}
+
+    def start(self, opts):
+        host = urlparse.urlsplit(opts.url).netloc
+        dl = self.cache.pop(host, None)
+        if not dl:
+            dl = _ExternalDownloader()
+            fl = fcntl.fcntl(dl.stdin, fcntl.F_GETFD)
+            fcntl.fcntl(dl.stdin, fcntl.F_SETFD, fl | fcntl.FD_CLOEXEC)
+        self.epoll.register(dl.stdout, select.EPOLLIN)
+        self.running[dl.stdout] = dl
+        dl.start(opts)
+
+    def perform(self):
+        ret = []
+        for fd, event in self.epoll.poll():
+            if event & select.EPOLLHUP:
+                if DEBUG: DEBUG.info('downloader died')
+                raise KeyboardInterrupt
+            assert event & select.EPOLLIN
+            done = self.running[fd].perform()
+            if not done: continue
+            assert len(done) == 1
+            ret.extend(done)
+
+            # dl finished, move it to the cache
+            host = urlparse.urlsplit(done[0][0].url).netloc
+            if host in self.cache: self.cache[host].abort()
+            self.epoll.unregister(fd)
+            self.cache[host] = self.running.pop(fd)
+        return ret
+
+    def abort(self):
+        for dl in self.running.values():
+            self.epoll.unregister(dl.stdout)
+            dl.abort()
+        for dl in self.cache.values():
+            dl.abort()
+
+
+#####################################################################
+#  High level async API
+#####################################################################
+
+_async_queue = []
+
+def parallel_wait(meter=None):
+    '''Process queued requests in parallel.
+    '''
+
+    # calculate total sizes
+    meters = {}
+    for opts in _async_queue:
+        if opts.progress_obj and opts.multi_progress_obj:
+            count, total = meters.get(opts.multi_progress_obj) or (0, 0)
+            meters[opts.multi_progress_obj] = count + 1, total + opts.size
+
+    # start multi-file meters
+    for meter in meters:
+        count, total = meters[meter]
+        meter.start(count, total)
+
+    dl = _ExternalDownloaderPool()
+    host_con = {} # current host connection counts
+
+    def start(opts, tries):
+        key, limit = opts.async
+        host_con[key] = host_con.get(key, 0) + 1
+        opts.tries = tries
+        if opts.progress_obj:
+            if opts.multi_progress_obj:
+                opts._progress = opts.multi_progress_obj.newMeter()
+                opts._progress.start(text=opts.text)
+            else:
+                opts._progress = time.time() # no updates
+        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
+        dl.start(opts)
+
+    def perform():
+        for opts, size, ug_err in dl.perform():
+            key, limit = opts.async
+            host_con[key] -= 1
+            if opts.progress_obj:
+                if opts.multi_progress_obj:
+                    opts.multi_progress_obj.re.total += size - opts.size # correct totals
+                    opts._progress.end(size)
+                    opts.multi_progress_obj.removeMeter(opts._progress)
+                else:
+                    opts.progress_obj.start(text=opts.text, now=opts._progress)
+                    opts.progress_obj.update(size)
+                    opts.progress_obj.end(size)
+                del opts._progress
+
+            if ug_err is None:
+                if opts.checkfunc:
+                    try: _run_callback(opts.checkfunc, opts)
+                    except URLGrabError, ug_err: pass
+                if ug_err is None:
+                    continue
+
+            retry = opts.retry or 0
+            if opts.failure_callback:
+                opts.exception = ug_err
+                try: _run_callback(opts.failure_callback, opts)
+                except URLGrabError, ug_err:
+                    retry = 0 # no retries
+            if opts.tries < retry and ug_err.errno in opts.retrycodes:
+                start(opts, opts.tries + 1) # simple retry
+                continue
+
+            if opts.mirror_group:
+                mg, errors, failed, removed = opts.mirror_group
+                errors.append((opts.url, str(ug_err)))
+                failed[key] = failed.get(key, 0) + 1
+                opts.mirror = key
+                opts.exception = ug_err
+                action = mg.default_action or {}
+                if mg.failure_callback:
+                    opts.tries = len(errors)
+                    action = dict(action) # update only the copy
+                    action.update(_run_callback(mg.failure_callback, opts))
+                if not action.get('fail', 0):
+                    # mask this mirror and retry
+                    if action.get('remove', 1):
+                        removed.add(key)
+                    _async_queue.append(opts)
+                    continue
+                # fail=1 from callback
+                ug_err.errors = errors
+
+            # urlgrab failed
+            opts.exception = ug_err
+            _run_callback(opts.failfunc, opts)
+
+    try:
+        idx = 0
+        while True:
+            if idx >= len(_async_queue):
+                # the queue is empty
+                if not dl.running: break
+                # pending dl may extend it
+                perform()
+                continue
+
+            # handle next request
+            opts = _async_queue[idx]
+            idx += 1
+
+            # check global limit
+            while len(dl.running) >= default_grabber.opts.max_connections:
+                perform()
+
+            if opts.mirror_group:
+                mg, errors, failed, removed = opts.mirror_group
+
+                # find the best mirror
+                best = None
+                best_speed = None
+                for mirror in mg.mirrors:
+                    key = mirror['mirror']
+                    if key in removed: continue
+
+                    # estimate mirror speed
+                    speed = _TH.estimate(key)
+                    speed /= 1 + host_con.get(key, 0)
+
+                    # 2-tuple to select mirror with least failures
+                    speed = -failed.get(key, 0), speed
+                    if best is None or speed > best_speed:
+                        best = mirror
+                        best_speed = speed
+
+                if best is None:
+                    opts.exception = URLGrabError(256, _('No more mirrors to try.'))
+                    opts.exception.errors = errors
+                    _run_callback(opts.failfunc, opts)
+                    continue
+
+                # update the grabber object, apply mirror kwargs
+                grabber = best.get('grabber') or mg.grabber
+                opts.delegate = grabber.opts.derive(**best.get('kwargs', {}))
+
+                # update the current mirror and limit
+                key = best['mirror']
+                limit = best.get('kwargs', {}).get('max_connections', 2)
+                opts.async = key, limit
+
+                # update URL and proxy
+                url = mg._join_url(key, opts.relative_url)
+                url, parts = opts.urlparser.parse(url, opts)
+                opts.find_proxy(url, parts[0])
+                opts.url = url
+
+            # check host limit, then start
+            key, limit = opts.async
+            while host_con.get(key, 0) >= limit:
+                perform()
+            start(opts, 1)
+    except IOError, e:
+        if e.errno != 4: raise
+        raise KeyboardInterrupt
+
+    finally:
+        dl.abort()
+        for meter in meters:
+            meter.end()
+        del _async_queue[:]
+        _TH.save()
+
+
+#####################################################################
+#  Host bandwidth estimation
+#####################################################################
+
+class _TH:
+    hosts = {}
+    dirty = None
+
+    @staticmethod
+    def load():
+        filename = default_grabber.opts.timedhosts
+        if filename and _TH.dirty is None:
+            try:
+                for line in open(filename):
+                    host, speed, fail, ts = line.rsplit(' ', 3)
+                    _TH.hosts[host] = int(speed), int(fail), int(ts)
+            except IOError: pass
+            _TH.dirty = False
+
+    @staticmethod
+    def save():
+        filename = default_grabber.opts.timedhosts
+        if filename and _TH.dirty is True:
+            tmp = '%s.%d' % (filename, os.getpid())
+            try:
+                f = open(tmp, 'w')
+                for host in _TH.hosts:
+                    f.write(host + ' %d %d %d\n' % _TH.hosts[host])
+                f.close()
+                os.rename(tmp, filename)
+            except IOError: pass
+            _TH.dirty = False
+
+    @staticmethod
+    def update(url, dl_size, dl_time, ug_err, baseurl=None):
+        # Use hostname from URL.  If it's a file:// URL, use baseurl.
+        # If no baseurl, do not update timedhosts.
+        host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
+        if not host: return
+
+        _TH.load()
+        speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
+        now = time.time()
+
+        if ug_err is None:
+            # defer first update if the file was small.  BZ 851178.
+            if not ts and dl_size < 1e6: return
+
+            # k1: the older, the less useful
+            # k2: <500ms readings are less reliable
+            # speeds vary, use 10:1 smoothing
+            k1 = 2**((ts - now) / default_grabber.opts.half_life)
+            k2 = min(dl_time / .500, 1.0) / 10
+            if k2 > 0:
+                speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
+            fail = 0
+        elif getattr(ug_err, 'code', None) == 404:
+            fail = 0 # alive, at least
+        else:
+            fail += 1 # seems dead
+
+        _TH.hosts[host] = speed, fail, now
+        _TH.dirty = True
+
+    @staticmethod
+    def estimate(baseurl):
+        _TH.load()
+
+        # Use just the hostname, unless it's a file:// baseurl.
+        host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
+
+        default_speed = default_grabber.opts.default_speed
+        try: speed, fail, ts = _TH.hosts[host]
+        except KeyError: return default_speed
+
+        speed *= 2**-fail
+        k = 2**((ts - time.time()) / default_grabber.opts.half_life)
+        speed = k * speed + (1 - k) * default_speed
+        return speed
+
+#####################################################################
 #  TESTING
 def _main_test():
     try: url, filename = sys.argv[1:3]
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
index dad410b..b17be17 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -76,6 +76,9 @@ CUSTOMIZATION
        'grabber' is omitted, the default grabber will be used.  If
        kwargs are omitted, then (duh) they will not be used.

+       kwarg 'max_connections' limits the number of concurrent
+       connections to this mirror.
+
     3) Pass keyword arguments when instantiating the mirror group.
        See, for example, the failure_callback argument.

@@ -87,10 +90,12 @@ CUSTOMIZATION
 """


+import sys
 import random
 import thread  # needed for locking to make this threadsafe

-from grabber import URLGrabError, CallbackObject, DEBUG
+from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
+from grabber import _run_callback, _do_raise

 def _(st):
     return st
@@ -126,7 +131,9 @@ class MirrorGroup:
         files)

       * if the local list is ever exhausted, a URLGrabError will be
-        raised (errno=256, no more mirrors)
+        raised (errno=256, No more mirrors).  The 'errors' attribute
+        holds a list of (full_url, errmsg) tuples.  This contains
+        all URLs tried and the corresponding error messages.

     OPTIONS

@@ -153,7 +160,8 @@ class MirrorGroup:

         The 'fail' option will cause immediate failure by re-raising
         the exception and no further attempts to get the current
-        download.
+        download.  As in the "No more mirrors" case, the 'errors'
+        attribute is set in the exception object.

         This dict can be set at instantiation time,
           mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
@@ -184,6 +192,7 @@ class MirrorGroup:

            obj.exception    = < exception that was raised >
            obj.mirror       = < the mirror that was tried >
+           obj.tries        = < the number of mirror tries so far >
            obj.relative_url = < url relative to the mirror >
            obj.url          = < full url that failed >
                               # .url is just the combination of .mirror
@@ -263,7 +272,8 @@ class MirrorGroup:
     def _parse_mirrors(self, mirrors):
         parsed_mirrors = []
         for m in mirrors:
-            if type(m) == type(''): m = {'mirror': m}
+            if isinstance(m, basestring):
+                m = {'mirror': _to_utf8(m)}
             parsed_mirrors.append(m)
         return parsed_mirrors

@@ -280,7 +290,9 @@ class MirrorGroup:
         #   return a random mirror so that multiple mirrors get used
         #   even without failures.
         if not gr.mirrors:
-            raise URLGrabError(256, _('No more mirrors to try.'))
+            e = URLGrabError(256, _('No more mirrors to try.'))
+            e.errors = gr.errors
+            raise e
         return gr.mirrors[gr._next]

     def _failure(self, gr, cb_obj):
@@ -307,7 +319,9 @@ class MirrorGroup:
         a.update(action)
         action = a
         self.increment_mirror(gr, action)
-        if action and action.get('fail', 0): raise
+        if action and action.get('fail', 0):
+            sys.exc_info()[1].errors = gr.errors
+            raise

     def increment_mirror(self, gr, action={}):
         """Tell the mirror object increment the mirror index
@@ -377,35 +391,50 @@ class MirrorGroup:
         gr.url  = url
         gr.kw   = dict(kw)
         self._load_gr(gr)
+        gr.errors = []

         for k in self.options:
             try: del kw[k]
             except KeyError: pass

+        tries = 0
         while 1:
+            tries += 1
             mirrorchoice = self._get_mirror(gr)
             fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
-            kwargs = dict(mirrorchoice.get('kwargs', {}))
-            kwargs.update(kw)
             grabber = mirrorchoice.get('grabber') or self.grabber
+            # apply mirrorchoice kwargs on top of grabber.opts
+            opts = grabber.opts.derive(**mirrorchoice.get('kwargs', {}))
             func_ref = getattr(grabber, func)
             if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
             try:
-                return func_ref( *(fullurl,), **kwargs )
+                return func_ref( *(fullurl,), opts=opts, **kw )
             except URLGrabError, e:
                 if DEBUG: DEBUG.info('MIRROR: failed')
+                gr.errors.append((fullurl, str(e)))
                 obj = CallbackObject()
                 obj.exception = e
                 obj.mirror = mirrorchoice['mirror']
                 obj.relative_url = gr.url
                 obj.url = fullurl
+                obj.tries = tries
                 self._failure(gr, obj)

     def urlgrab(self, url, filename=None, **kwargs):
         kw = dict(kwargs)
         kw['filename'] = filename
+        if kw.get('async'):
+            # enable mirror failovers in async path
+            kw['mirror_group'] = self, [], {}, set()
+            kw['relative_url'] = url
+        else:
+            kw.pop('failfunc', None)
         func = 'urlgrab'
-        return self._mirror_try(func, url, kw)
+        try:
+            return self._mirror_try(func, url, kw)
+        except URLGrabError, e:
+            obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs)
+            return _run_callback(kwargs.get('failfunc', _do_raise), obj)

     def urlopen(self, url, **kwargs):
         kw = dict(kwargs)
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
index dd07c6a..ad57dbc 100644
--- a/urlgrabber/progress.py
+++ b/urlgrabber/progress.py
@@ -211,6 +211,21 @@ def text_meter_total_size(size, downloaded=0):
 #        4. +                     ( 5, total: 32)
 #

+def _term_add_bar(tl, bar_max_length, pc):
+    blen = bar_max_length
+    bar  = '='*int(blen * pc)
+    if (blen * pc) - int(blen * pc) >= 0.5:
+        bar += '-'
+    return tl.add(' [%-*.*s]' % (blen, blen, bar))
+
+def _term_add_end(tl, osize, size):
+    if osize is not None:
+        if size > osize: # Is ??? better? Really need something to say < vs >.
+            return tl.add(' !!! '), True
+        elif size != osize:
+            return tl.add(' ... '), True
+    return tl.add(' ' * 5), False
+
 class TextMeter(BaseMeter):
     def __init__(self, fo=sys.stderr):
         BaseMeter.__init__(self)
@@ -259,13 +274,10 @@ class TextMeter(BaseMeter):
             ui_rate = tl.add(' %5sB/s' % ave_dl)
             # Make text grow a bit before we start growing the bar too
             blen = 4 + tl.rest_split(8 + 8 + 4)
-            bar  = '='*int(blen * frac)
-            if (blen * frac) - int(blen * frac) >= 0.5:
-                bar += '-'
-            ui_bar  = tl.add(' [%-*.*s]' % (blen, blen, bar))
-            out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
-                                              ui_sofar_pc, ui_pc, ui_bar,
-                                              ui_rate, ui_size, ui_time, ui_end)
+            ui_bar = _term_add_bar(tl, blen, frac)
+            out = '\r%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
+                                                ui_sofar_pc, ui_pc, ui_bar,
+                                                ui_rate,ui_size,ui_time, ui_end)

         self.fo.write(out)
         self.fo.flush()
@@ -284,12 +296,7 @@ class TextMeter(BaseMeter):
         tl = TerminalLine(8)
         ui_size = tl.add(' | %5sB' % total_size)
         ui_time = tl.add(' %9s' % total_time)
-        not_done = self.size is not None and amount_read != self.size
-        if not_done:
-            ui_end  = tl.add(' ... ')
-        else:
-            ui_end  = tl.add(' ' * 5)
-
+        ui_end, not_done = _term_add_end(tl, self.size, amount_read)
         out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
                                     ui_size, ui_time, ui_end)
         self.fo.write(out)
@@ -331,12 +338,21 @@ class MultiFileHelper(BaseMeter):
     def message(self, message):
         self.master.message_meter(self, message)

+class _FakeLock:
+    def acquire(self):
+        pass
+    def release(self):
+        pass
+
 class MultiFileMeter:
     helperclass = MultiFileHelper
-    def __init__(self):
+    def __init__(self, threaded=True):
         self.meters = []
         self.in_progress_meters = []
-        self._lock = thread.allocate_lock()
+        if threaded:
+            self._lock = thread.allocate_lock()
+        else:
+            self._lock = _FakeLock()
         self.update_period = 0.3 # seconds

         self.numfiles         = None
@@ -369,6 +385,7 @@ class MultiFileMeter:

     def end(self, now=None):
         if now is None: now = time.time()
+        self.re.update(self._amount_read(), now)
         self._do_end(now)

     def _do_end(self, now):
@@ -466,11 +483,21 @@ class MultiFileMeter:


 class TextMultiFileMeter(MultiFileMeter):
-    def __init__(self, fo=sys.stderr):
+    def __init__(self, fo=sys.stderr, threaded=True):
         self.fo = fo
-        MultiFileMeter.__init__(self)
+        MultiFileMeter.__init__(self, threaded)
+        self.index_time = self.index = 0

     # files: ###/### ###%  data: ######/###### ###%  time: ##:##:##/##:##:##
+# New output, like TextMeter output...
+#       update: Size, All files
+#       -----------------------
+# (<#file>/<#tot files>): <text> <pc> <bar> <rate> | <size> <eta time> ETA
+#                          8-22 1 3-4 1 6-12 1   8 3     6 1        9 1  3 1
+#       end
+#       ---
+# <text>                                 | <file size> <file elapsed time>
+#  8-56                                  3          6 1                 9 5
     def _do_update_meter(self, meter, now):
         self._lock.acquire()
         try:
@@ -480,7 +507,7 @@ class TextMultiFileMeter(MultiFileMeter):
             tf = self.numfiles or 1
             pf = 100 * float(df)/tf + 0.49
             dd = self.re.last_amount_read
-            td = self.total_size
+            td = self.re.total
             pd = 100 * (self.re.fraction_read() or 0) + 0.49
             dt = self.re.elapsed_time()
             rt = self.re.remaining_time()
@@ -491,9 +518,41 @@ class TextMultiFileMeter(MultiFileMeter):
             ftd = format_number(td) + 'B'
             fdt = format_time(dt, 1)
             ftt = format_time(tt, 1)
-
-            out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
-            self.fo.write('\r' + out)
+
+            frac = self.re.fraction_read() or 0
+            ave_dl = format_number(self.re.average_rate())
+
+            # cycle through active meters
+            if now > self.index_time:
+                self.index_time = now + 1.0
+                self.index += 1
+            if self.index >= len(self.meters):
+                self.index = 0
+            meter = self.meters[self.index]
+            text = meter.text or meter.basename
+            if tf > 1:
+                text = '(%u/%u): %s' % (df+1+self.index, tf, text)
+
+            # Include text + ui_rate in minimal
+            tl = TerminalLine(8, 8+1+8)
+
+            ui_size = tl.add(' | %5sB' % format_number(dd))
+
+            ui_time = tl.add(' %9s' % format_time(rt))
+            ui_end  = tl.add(' ETA ')
+
+            ui_sofar_pc = tl.add(' %i%%' % pf,
+                                 full_len=len(" (100%)"))
+            ui_rate = tl.add(' %5sB/s' % ave_dl)
+
+            # Make text grow a bit before we start growing the bar too
+            blen = 4 + tl.rest_split(8 + 8 + 4)
+            ui_bar = _term_add_bar(tl, blen, frac)
+            out = '\r%-*.*s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
+                                              ui_sofar_pc, ui_bar,
+                                              ui_rate, ui_size, ui_time,
+                                              ui_end)
+            self.fo.write(out)
             self.fo.flush()
         finally:
             self._lock.release()
@@ -502,18 +561,30 @@ class TextMultiFileMeter(MultiFileMeter):
         self._lock.acquire()
         try:
             format = "%-30.30s %6.6s    %8.8s    %9.9s"
-            fn = meter.basename
+            fn = meter.text or meter.basename
             size = meter.last_amount_read
             fsize = format_number(size) + 'B'
             et = meter.re.elapsed_time()
             fet = format_time(et, 1)
-            frate = format_number(size / et) + 'B/s'
-
-            out = '%-79.79s' % (format % (fn, fsize, fet, frate))
-            self.fo.write('\r' + out + '\n')
+            frate = format_number(et and size / et) + 'B/s'
+            df = self.finished_files
+            tf = self.numfiles or 1
+
+            total_time = format_time(et)
+            total_size = format_number(size)
+            text = meter.text or meter.basename
+            if tf > 1:
+                text = '(%u/%u): %s' % (df, tf, text)
+
+            tl = TerminalLine(8)
+            ui_size = tl.add(' | %5sB' % total_size)
+            ui_time = tl.add(' %9s' % total_time)
+            ui_end, not_done = _term_add_end(tl, meter.size, size)
+            out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
+                                        ui_size, ui_time, ui_end)
+            self.fo.write(out)
         finally:
             self._lock.release()
-        self._do_update_meter(meter, now)

     def _do_failure_meter(self, meter, message, now):
         self._lock.acquire()
@@ -536,15 +607,6 @@ class TextMultiFileMeter(MultiFileMeter):
             pass
         finally:
             self._lock.release()
-
-    def _do_end(self, now):
-        self._do_update_meter(None, now)
-        self._lock.acquire()
-        try:
-            self.fo.write('\n')
-            self.fo.flush()
-        finally:
-            self._lock.release()

 ######################################################################
 # support classes and functions
@@ -658,6 +720,8 @@ def format_time(seconds, use_hours=0):
     if seconds is None or seconds < 0:
         if use_hours: return '--:--:--'
         else:         return '--:--'
+    elif seconds == float('inf'):
+        return 'Infinite'
     else:
         seconds = int(seconds)
         minutes = seconds / 60