update to latest head, multi-downloader

13 years ago · a16417493c
parent dadfd07b88
commit a16417493c
3 changed files with 1467 additions and 58 deletions
--- a/multi-downloader.patch
+++ b/multi-downloader.patch
@ -0,0 +1,904 @@
 diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
 new file mode 100755
 index 0000000..c37e6a8
 --- /dev/null
 +++ b/scripts/urlgrabber-ext-down
@@ -0,0 +1,55 @@
 +#! /usr/bin/python
 +#  A very simple external downloader
 +
 +import time, os, errno, sys
 +from urlgrabber.grabber import \
 +    _readlines, URLGrabberOptions, _loads, \
 +    PyCurlFileObject, URLGrabError
 +
 +def write(fmt, *arg):
 +    try: os.write(1, fmt % arg)
 +    except OSError, e:
 +        if e.arg[0] != errno.EPIPE: raise
 +        sys.exit(1)
 +
 +class ProxyProgress:
 +    def start(self, *d1, **d2):
 +        self.next_update = 0
 +    def update(self, _amount_read):
 +        t = time.time()
 +        if t < self.next_update: return
 +        self.next_update = t + 0.31
 +        write('%d %d\n', self._id, _amount_read)
 +
 +def main():
 +    import signal
 +    signal.signal(signal.SIGINT, lambda n, f: sys.exit(1))
 +    cnt = 0
 +    while True:
 +        lines = _readlines(0)
 +        if not lines: break
 +        for line in lines:
 +            cnt += 1
 +            opts = URLGrabberOptions()
 +            opts._id = cnt
 +            for k in line.split(' '):
 +                k, v = k.split('=', 1)
 +                setattr(opts, k, _loads(v))
 +            if opts.progress_obj:
 +                opts.progress_obj = ProxyProgress()
 +                opts.progress_obj._id = cnt
 +            tm = time.time()
 +            try:
 +                fo = PyCurlFileObject(opts.url, opts.filename, opts)
 +                fo._do_grab()
 +                fo.fo.close()
 +                size = fo._amount_read
 +                dlsz = size - fo._reget_length
 +                ug_err = 'OK'
 +            except URLGrabError, e:
 +                size = dlsz = 0
 +                ug_err = '%d %s' % e.args
 +            write('%d %d %d %.3f %s\n', opts._id, size, dlsz, time.time() - tm, ug_err)
 +
 +if __name__ == '__main__':
 +    main()
 diff --git a/setup.py b/setup.py
 index d0b87b8..bfa4a18 100644
 --- a/setup.py
 +++ b/setup.py
@@ -15,8 +15,10 @@ url = _urlgrabber.__url__
 packages = ['urlgrabber']
 package_dir = {'urlgrabber':'urlgrabber'}
 scripts = ['scripts/urlgrabber']
 -data_files = [('share/doc/' + name + '-' + version,
 -               ['README','LICENSE', 'TODO', 'ChangeLog'])]
 +data_files = [
 +    ('share/doc/' + name + '-' + version, ['README','LICENSE', 'TODO', 'ChangeLog']),
 +    ('libexec', ['scripts/urlgrabber-ext-down']),
 +]
 options = { 'clean' : { 'all' : 1 } }
 classifiers = [
         'Development Status :: 4 - Beta',
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
 index 38ae1f7..094be77 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -263,6 +263,33 @@ GENERAL ARGUMENTS (kwargs)
     What type of name to IP resolving to use, default is to do both IPV4 and
     IPV6.
 +  async = (key, limit)
 +
 +    When this option is set, the urlgrab() is not processed immediately
 +    but queued.  parallel_wait() then processes grabs in parallel, limiting
 +    the numer of connections in each 'key' group to at most 'limit'.
 +
 +  max_connections
 +
 +    The global connection limit.
 +
 +  timedhosts
 +
 +    The filename of the host download statistics.  If defined, urlgrabber
 +    will update the stats at the end of every download.  At the end of
 +    parallel_wait(), the updated stats are saved.  If synchronous grabs
 +    are used, you should call th_save().
 +
 +  default_speed, half_life
 +
 +    These options only affect the async mirror selection code.
 +    The default_speed option sets the speed estimate for mirrors
 +    we have never downloaded from, and defaults to 1 MBps.
 +
 +    The speed estimate also drifts exponentially from the speed
 +    actually measured to the default speed, with default
 +    period of 30 days.
 +
 RETRY RELATED ARGUMENTS
@@ -343,6 +370,15 @@ RETRY RELATED ARGUMENTS
     but it cannot (without severe trickiness) prevent the exception
     from being raised.
 +  failfunc = None
 +
 +    The callback that gets called when urlgrab request fails.
 +    If defined, urlgrab() calls it instead of raising URLGrabError.
 +    Callback syntax is identical to failure_callback.
 +
 +    Contrary to failure_callback, it's called only once.  It's primary
 +    purpose is to use urlgrab() without a try/except block.
 +
   interrupt_callback = None
     This callback is called if KeyboardInterrupt is received at any
@@ -444,7 +480,7 @@ import pycurl
 from ftplib import parse150
 from StringIO import StringIO
 from httplib import HTTPException
 -import socket
 +import socket, select, fcntl
 from byterange import range_tuple_normalize, range_tuple_to_header, RangeError
 try:
@@ -878,6 +914,7 @@ class URLGrabberOptions:
         self.retry = None
         self.retrycodes = [-1,2,4,5,6,7]
         self.checkfunc = None
 +        self.failfunc = _do_raise
         self.copy_local = 0
         self.close_connection = 0
         self.range = None
@@ -886,6 +923,7 @@ class URLGrabberOptions:
         self.keepalive = 1
         self.proxies = None
         self.libproxy = False
 +        self.proxy = None
         self.reget = None
         self.failure_callback = None
         self.interrupt_callback = None
@@ -913,6 +951,12 @@ class URLGrabberOptions:
         self.size = None # if we know how big the thing we're getting is going
                          # to be. this is ultimately a MAXIMUM size for the file
         self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
 +        self.async = None # blocking by default
 +        self.mirror_group = None
 +        self.max_connections = 5
 +        self.timedhosts = None
 +        self.half_life = 30*24*60*60 # 30 days
 +        self.default_speed = 1e6 # 1 MBit
     def __repr__(self):
         return self.format()
@@ -932,6 +976,17 @@ class URLGrabberOptions:
         s = s + indent + '}'
         return s
 +def _do_raise(obj):
 +    raise obj.exception
 +
 +def _run_callback(cb, obj):
 +    if not cb:
 +        return
 +    if callable(cb):
 +        return cb(obj)
 +    cb, arg, karg = cb
 +    return cb(obj, *arg, **karg)
 +
 class URLGrabber(object):
     """Provides easy opening of URLs with a variety of options.
@@ -977,10 +1032,9 @@ class URLGrabber(object):
             if DEBUG: DEBUG.info('exception: %s', exception)
             if callback:
                 if DEBUG: DEBUG.info('calling callback: %s', callback)
 -                cb_func, cb_args, cb_kwargs = self._make_callback(callback)
                 obj = CallbackObject(exception=exception, url=args[0],
                                      tries=tries, retry=opts.retry)
 -                cb_func(obj, *cb_args, **cb_kwargs)
 +                _run_callback(callback, obj)
             if (opts.retry is None) or (tries == opts.retry):
                 if DEBUG: DEBUG.info('retries exceeded, re-raising')
@@ -1043,30 +1097,36 @@ class URLGrabber(object):
             elif not opts.range:
                 if not opts.checkfunc is None:
 -                    cb_func, cb_args, cb_kwargs = \
 -                       self._make_callback(opts.checkfunc)
 -                    obj = CallbackObject()
 -                    obj.filename = path
 -                    obj.url = url
 -                    apply(cb_func, (obj, )+cb_args, cb_kwargs)        
 +                    obj = CallbackObject(filename=path, url=url)
 +                    _run_callback(opts.checkfunc, obj)
                 return path
 +        if opts.async:
 +            opts.url = url
 +            opts.filename = filename
 +            opts.size = int(opts.size or 0)
 +            _async_queue.append(opts)
 +            return filename
 +
         def retryfunc(opts, url, filename):
 +            tm = time.time()
             fo = PyCurlFileObject(url, filename, opts)
             try:
                 fo._do_grab()
 +                _TH.update(url, fo._amount_read - fo._reget_length, time.time() - tm, None)
                 if not opts.checkfunc is None:
 -                    cb_func, cb_args, cb_kwargs = \
 -                             self._make_callback(opts.checkfunc)
 -                    obj = CallbackObject()
 -                    obj.filename = filename
 -                    obj.url = url
 -                    apply(cb_func, (obj, )+cb_args, cb_kwargs)
 +                    obj = CallbackObject(filename=filename, url=url)
 +                    _run_callback(opts.checkfunc, obj)
             finally:
                 fo.close()
             return filename
 -        return self._retry(opts, retryfunc, url, filename)
 +        try:
 +            return self._retry(opts, retryfunc, url, filename)
 +        except URLGrabError, e:
 +            _TH.update(url, 0, 0, e)
 +            opts.exception = e
 +            return _run_callback(opts.failfunc, opts)
     def urlread(self, url, limit=None, **kwargs):
         """read the url into a string, up to 'limit' bytes
@@ -1095,12 +1155,8 @@ class URLGrabber(object):
                 else: s = fo.read(limit)
                 if not opts.checkfunc is None:
 -                    cb_func, cb_args, cb_kwargs = \
 -                             self._make_callback(opts.checkfunc)
 -                    obj = CallbackObject()
 -                    obj.data = s
 -                    obj.url = url
 -                    apply(cb_func, (obj, )+cb_args, cb_kwargs)
 +                    obj = CallbackObject(data=s, url=url)
 +                    _run_callback(opts.checkfunc, obj)
             finally:
                 fo.close()
             return s
@@ -1115,6 +1171,7 @@ class URLGrabber(object):
         return s
     def _make_callback(self, callback_obj):
 +        # not used, left for compatibility
         if callable(callback_obj):
             return callback_obj, (), {}
         else:
@@ -1346,14 +1403,8 @@ class PyCurlFileObject(object):
             return
         try:
 -            e = None
             self.curl_obj.perform()
 -        except pycurl.error, e: pass
 -        self._do_perform_exc(e)
 -
 -    def _do_perform_exc(self, e):
 -        # handle pycurl exception 'e'
 -        if e:
 +        except pycurl.error, e:
             # XXX - break some of these out a bit more clearly
             # to other URLGrabErrors from 
             # http://curl.haxx.se/libcurl/c/libcurl-errors.html
@@ -1607,7 +1658,22 @@ class PyCurlFileObject(object):
         _was_filename = False
         if type(self.filename) in types.StringTypes and self.filename:
             _was_filename = True
 -            self._do_open_fo()
 +            self._prog_reportname = str(self.filename)
 +            self._prog_basename = os.path.basename(self.filename)
 +            
 +            if self.append: mode = 'ab'
 +            else: mode = 'wb'
 +
 +            if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
 +                                 (self.filename, mode))
 +            try:
 +                self.fo = open(self.filename, mode)
 +            except IOError, e:
 +                err = URLGrabError(16, _(\
 +                  'error opening local file from %s, IOError: %s') % (self.url, e))
 +                err.url = self.url
 +                raise err
 +
         else:
             self._prog_reportname = 'MEMORY'
             self._prog_basename = 'MEMORY'
@@ -1627,7 +1693,29 @@ class PyCurlFileObject(object):
             raise e
         if _was_filename:
 -            self._do_close_fo()
 +            # close it up
 +            self.fo.flush()
 +            self.fo.close()
 +
 +            # Set the URL where we got it from:
 +            if xattr is not None:
 +                # See: http://www.freedesktop.org/wiki/CommonExtendedAttributes
 +                try:
 +                    xattr.set(self.filename, 'user.xdg.origin.url', self.url)
 +                except:
 +                    pass # URL too long. = IOError ... ignore everything.
 +
 +            # set the time
 +            mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
 +            if mod_time != -1:
 +                try:
 +                    os.utime(self.filename, (mod_time, mod_time))
 +                except OSError, e:
 +                    err = URLGrabError(16, _(\
 +                      'error setting timestamp on file %s from %s, OSError: %s') 
 +                              % (self.filename, self.url, e))
 +                    err.url = self.url
 +                    raise err
             # re open it
             try:
                 self.fo = open(self.filename, 'r')
@@ -1643,47 +1731,6 @@ class PyCurlFileObject(object):
         self._complete = True
 -    def _do_open_fo(self):
 -        self._prog_reportname = str(self.filename)
 -        self._prog_basename = os.path.basename(self.filename)
 -        if self.append: mode = 'ab'
 -        else: mode = 'wb'
 -
 -        if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
 -                             (self.filename, mode))
 -        try:
 -            self.fo = open(self.filename, mode)
 -        except IOError, e:
 -            err = URLGrabError(16, _(\
 -              'error opening local file from %s, IOError: %s') % (self.url, e))
 -            err.url = self.url
 -            raise err
 -
 -    def _do_close_fo(self):
 -        # close it up
 -        self.fo.flush()
 -        self.fo.close()
 -
 -        # Set the URL where we got it from:
 -        if xattr is not None:
 -            # See: http://www.freedesktop.org/wiki/CommonExtendedAttributes
 -            try:
 -                xattr.set(self.filename, 'user.xdg.origin.url', self.url)
 -            except:
 -                pass # URL too long. = IOError ... ignore everything.
 -
 -        # set the time
 -        mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
 -        if mod_time != -1:
 -            try:
 -                os.utime(self.filename, (mod_time, mod_time))
 -            except OSError, e:
 -                err = URLGrabError(16, _(\
 -                  'error setting timestamp on file %s from %s, OSError: %s') 
 -                          % (self.filename, self.url, e))
 -                err.url = self.url
 -                raise err
 -
     def _fill_buffer(self, amt=None):
         """fill the buffer to contain at least 'amt' bytes by reading
         from the underlying file object.  If amt is None, then it will
@@ -1858,6 +1905,425 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
 #####################################################################
 +#  Serializer + parser: A replacement of the rather bulky Json code.
 +#
 +# - handles basic python literals, lists and tuples.
 +# - serialized strings never contain ' ' or '\n'
 +#
 +#####################################################################
 +
 +_quoter_map = {}
 +for c in '%[(,)] \n':
 +    _quoter_map[c] = '%%%02x' % ord(c)
 +del c
 +
 +def _dumps(v):
 +    if v is None: return 'None'
 +    if v is True: return 'True'
 +    if v is False: return 'False'
 +    if type(v) in (int, long, float):
 +        return str(v)
 +    if type(v) == unicode:
 +        v = v.encode('UTF8')
 +    if type(v) == str:
 +        def quoter(c): return _quoter_map.get(c, c)
 +        return "'%s'" % ''.join(map(quoter, v))
 +    if type(v) == tuple:
 +        return "(%s)" % ','.join(map(_dumps, v))
 +    if type(v) == list:
 +        return "[%s]" % ','.join(map(_dumps, v))
 +    raise TypeError, 'Can\'t serialize %s' % v
 +
 +def _loads(s):
 +    def decode(v):
 +        if v == 'None': return None
 +        if v == 'True': return True
 +        if v == 'False': return False
 +        try: return int(v)
 +        except ValueError: pass
 +        try: return float(v)
 +        except ValueError: pass
 +        if len(v) >= 2 and v[0] == v[-1] == "'":
 +            ret = []; i = 1
 +            while True:
 +                j = v.find('%', i)
 +                ret.append(v[i:j]) # skips the final "'"
 +                if j == -1: break
 +                ret.append(chr(int(v[j + 1:j + 3], 16)))
 +                i = j + 3
 +            v = ''.join(ret)
 +        return v
 +    stk = None
 +    l = []
 +    i = j = 0
 +    while True:
 +        if j == len(s) or s[j] in ',)]':
 +            if j > i:
 +                l.append(decode(s[i:j]))
 +            if j == len(s): break
 +            if s[j] in ')]':
 +                if s[j] == ')':
 +                    l = tuple(l)
 +                stk[0].append(l)
 +                l, stk = stk
 +            i = j = j + 1
 +        elif s[j] in '[(':
 +            stk = l, stk
 +            l = []
 +            i = j = j + 1
 +        else:
 +            j += 1 # safe because '[(,)]' are quoted
 +    if stk: raise ValueError
 +    if len(l) == 1: l = l[0]
 +    return l
 +
 +
 +#####################################################################
 +#  External downloader process
 +#####################################################################
 +
 +def _readlines(fd):
 +    buf = os.read(fd, 4096)
 +    if not buf: return None
 +    # whole lines only, no buffering
 +    while buf[-1] != '\n':
 +        buf += os.read(fd, 4096)
 +    return buf[:-1].split('\n')
 +
 +import subprocess
 +
 +class _ExternalDownloader:
 +    def __init__(self):
 +        self.popen = subprocess.Popen(
 +            '/usr/libexec/urlgrabber-ext-down',
 +            stdin = subprocess.PIPE,
 +            stdout = subprocess.PIPE,
 +        )
 +        self.stdin  = self.popen.stdin.fileno()
 +        self.stdout = self.popen.stdout.fileno()
 +        self.running = {}
 +        self.cnt = 0
 +
 +    # list of options we pass to downloader
 +    _options = (
 +        'url', 'filename',
 +        'timeout', 'close_connection', 'keepalive',
 +        'throttle', 'bandwidth', 'range', 'reget',
 +        'user_agent', 'http_headers', 'ftp_headers',
 +        'proxy', 'prefix', 'username', 'password',
 +        'ssl_ca_cert',
 +        'ssl_cert', 'ssl_cert_type',
 +        'ssl_key', 'ssl_key_type',
 +        'ssl_key_pass',
 +        'ssl_verify_peer', 'ssl_verify_host',
 +        'size', 'max_header_size', 'ip_resolve',
 +    )
 +
 +    def start(self, opts):
 +        arg = []
 +        for k in self._options:
 +            v = getattr(opts, k)
 +            if v is None: continue
 +            arg.append('%s=%s' % (k, _dumps(v)))
 +        if opts.progress_obj:
 +            arg.append('progress_obj=True')
 +        arg = ' '.join(arg)
 +        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
 +
 +        self.cnt += 1
 +        self.running[self.cnt] = opts
 +        os.write(self.stdin, arg +'\n')
 +
 +    def perform(self):
 +        ret = []
 +        lines = _readlines(self.stdout)
 +        if not lines:
 +            if DEBUG: DEBUG.info('downloader died')
 +            raise KeyboardInterrupt
 +        for line in lines:
 +            # parse downloader output
 +            line = line.split(' ', 5)
 +            _id, size = map(int, line[:2])
 +            if len(line) == 2:
 +                opts = self.running[_id]
 +                m = opts.progress_obj
 +                if m:
 +                    if not m.last_update_time:
 +                        m.start(text = opts.text)
 +                    m.update(size)
 +                continue
 +            # job done
 +            opts = self.running.pop(_id)
 +            if line[4] == 'OK':
 +                ug_err = None
 +                if DEBUG: DEBUG.info('success')
 +            else:
 +                ug_err = URLGrabError(int(line[4]), line[5])
 +                if DEBUG: DEBUG.info('failure: %s', err)
 +            _TH.update(opts.url, int(line[2]), float(line[3]), ug_err)
 +            ret.append((opts, size, ug_err))
 +        return ret
 +
 +    def abort(self):
 +        self.popen.stdin.close()
 +        self.popen.stdout.close()
 +        self.popen.wait()
 +
 +class _ExternalDownloaderPool:
 +    def __init__(self):
 +        self.epoll = select.epoll()
 +        self.running = {}
 +        self.cache = {}
 +
 +    def start(self, opts):
 +        host = urlparse.urlsplit(opts.url).netloc
 +        dl = self.cache.pop(host, None)
 +        if not dl:
 +            dl = _ExternalDownloader()
 +            fl = fcntl.fcntl(dl.stdin, fcntl.F_GETFD)
 +            fcntl.fcntl(dl.stdin, fcntl.F_SETFD, fl | fcntl.FD_CLOEXEC)
 +        self.epoll.register(dl.stdout, select.EPOLLIN)
 +        self.running[dl.stdout] = dl
 +        dl.start(opts)
 +
 +    def perform(self):
 +        ret = []
 +        for fd, event in self.epoll.poll():
 +            assert event & select.EPOLLIN
 +            done = self.running[fd].perform()
 +            if not done: continue
 +            assert len(done) == 1
 +            ret.extend(done)
 +
 +            # dl finished, move it to the cache
 +            host = urlparse.urlsplit(done[0][0].url).netloc
 +            if host in self.cache: self.cache[host].abort()
 +            self.epoll.unregister(fd)
 +            self.cache[host] = self.running.pop(fd)
 +        return ret
 +
 +    def abort(self):
 +        for dl in self.running.values():
 +            self.epoll.unregister(dl.stdout)
 +            dl.abort()
 +        for dl in self.cache.values():
 +            dl.abort()
 +
 +
 +#####################################################################
 +#  High level async API
 +#####################################################################
 +
 +_async_queue = []
 +
 +def parallel_wait(meter = 'text'):
 +    '''Process queued requests in parallel.
 +    '''
 +
 +    if meter:
 +        count = total = 0
 +        for opts in _async_queue:
 +            count += 1
 +            total += opts.size
 +        if meter == 'text':
 +            from progress import TextMultiFileMeter
 +            meter = TextMultiFileMeter()
 +        meter.start(count, total)
 +
 +    dl = _ExternalDownloaderPool()
 +    host_con = {} # current host connection counts
 +
 +    def start(opts, tries):
 +        key, limit = opts.async
 +        host_con[key] = host_con.get(key, 0) + 1
 +        opts.tries = tries
 +        opts.progress_obj = meter and meter.newMeter()
 +        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
 +        dl.start(opts)
 +
 +    def perform():
 +        for opts, size, ug_err in dl.perform():
 +            key, limit = opts.async
 +            host_con[key] -= 1
 +            if meter:
 +                m = opts.progress_obj
 +                m.basename = os.path.basename(opts.filename)
 +                if ug_err:
 +                    m.failure(ug_err.args[1])
 +                else:
 +                    # file size might have changed
 +                    meter.re.total += size - opts.size
 +                    m.end(size)
 +                meter.removeMeter(m)
 +
 +            if ug_err is None:
 +                if opts.checkfunc:
 +                    try: _run_callback(opts.checkfunc, opts)
 +                    except URLGrabError, ug_err: pass
 +                if ug_err is None:
 +                    continue
 +
 +            retry = opts.retry or 0
 +            if opts.failure_callback:
 +                opts.exception = ug_err
 +                try: _run_callback(opts.failure_callback, opts)
 +                except URLGrabError, ug_err:
 +                    retry = 0 # no retries
 +            if opts.tries < retry and ug_err.args[0] in opts.retrycodes:
 +                start(opts, opts.tries + 1) # simple retry
 +                continue
 +
 +            if opts.mirror_group:
 +                mg, failed = opts.mirror_group
 +                opts.mirror = key
 +                opts.exception = ug_err
 +                action = _run_callback(mg.failure_callback, opts)
 +                if not (action and action.get('fail')):
 +                    # mask this mirror and retry
 +                    failed.add(key)
 +                    _async_queue.append(opts)
 +                    continue
 +
 +            # urlgrab failed
 +            opts.exception = ug_err
 +            _run_callback(opts.failfunc, opts)
 +
 +    try:
 +        idx = 0
 +        while True:
 +            if idx >= len(_async_queue):
 +                # the queue is empty
 +                if not dl.running: break
 +                # pending dl may extend it
 +                perform()
 +                continue
 +
 +            # handle next request
 +            opts = _async_queue[idx]
 +            idx += 1
 +
 +            # check global limit
 +            while len(dl.running) >= opts.max_connections:
 +                perform()
 +
 +            if opts.mirror_group:
 +                mg, failed = opts.mirror_group
 +
 +                # find the best mirror
 +                best = None
 +                for mirror in mg.mirrors:
 +                    key = mirror['mirror']
 +                    if key in failed: continue
 +
 +                    # estimate mirror speed
 +                    speed = _TH.estimate(key)
 +                    speed /= 1 + host_con.get(key, 0)
 +                    if best is None or speed > best_speed:
 +                        best = mirror
 +                        best_speed = speed
 +
 +                if best is None:
 +                    opts.exception = URLGrabError(256, _('No more mirrors to try.'))
 +                    _run_callback(opts.failfunc, opts)
 +                    continue
 +
 +                # update the current mirror and limit
 +                key = best['mirror']
 +                limit = best.get('kwargs', {}).get('max_connections', 3)
 +                opts.async = key, limit
 +
 +                # update URL and proxy
 +                url = mg._join_url(key, opts.relative_url)
 +                url, parts = opts.urlparser.parse(url, opts)
 +                opts.find_proxy(url, parts[0])
 +                opts.url = url
 +
 +            # check host limit, then start
 +            key, limit = opts.async
 +            while host_con.get(key, 0) >= limit:
 +                perform()
 +            start(opts, 1)
 +    except IOError, e:
 +        if e.errno != 4: raise
 +        raise KeyboardInterrupt
 +
 +    finally:
 +        dl.abort()
 +        if meter: meter.end()
 +        del _async_queue[:]
 +        _TH.save()
 +
 +
 +#####################################################################
 +#  Host bandwidth estimation
 +#####################################################################
 +
 +class _TH:
 +    hosts = {}
 +    dirty = None
 +
 +    @staticmethod
 +    def load():
 +        filename = default_grabber.opts.timedhosts
 +        if filename and _TH.dirty is None:
 +            try:
 +                for line in open(filename):
 +                    host, speed, fail, ts = line.split()
 +                    _TH.hosts[host] = int(speed), int(fail), int(ts)
 +            except IOError: pass
 +            _TH.dirty = False
 +
 +    @staticmethod
 +    def save():
 +        filename = default_grabber.opts.timedhosts
 +        if filename and _TH.dirty is True:
 +            tmp = '%s.%d' % (filename, os.getpid())
 +            try:
 +                f = open(tmp, 'w')
 +                for host in _TH.hosts:
 +                    f.write(host + ' %d %d %d\n' % _TH.hosts[host])
 +                f.close()
 +                os.rename(tmp, filename)
 +            except IOError: pass
 +            _TH.dirty = False
 +
 +    @staticmethod
 +    def update(url, dl_size, dl_time, ug_err):
 +        _TH.load()
 +        host = urlparse.urlsplit(url).netloc
 +        speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
 +        now = time.time()
 +
 +        if ug_err is None:
 +            # k1: the older, the less useful
 +            # k2: if it was <1MiB, don't trust it much
 +            # speeds vary, use 10:1 smoothing
 +            k1 = 2**((ts - now) / default_grabber.opts.half_life)
 +            k2 = min(dl_size / 1e6, 1.0) / 10
 +            speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
 +            fail = 0
 +        elif getattr(ug_err, 'code', None) == 404:
 +            fail = 0 # alive, at least
 +        else:
 +            fail += 1 # seems dead
 +
 +        _TH.hosts[host] = speed, fail, now
 +        _TH.dirty = True
 +
 +    @staticmethod
 +    def estimate(url):
 +        _TH.load()
 +        host = urlparse.urlsplit(url).netloc
 +        default_speed = default_grabber.opts.default_speed
 +        try: speed, fail, ts = _TH.hosts[host]
 +        except KeyError: return default_speed
 +
 +        speed *= 2**-fail
 +        k = 2**((ts - time.time()) / default_grabber.opts.half_life)
 +        speed = k * speed + (1 - k) * default_speed
 +        return speed
 +
 +#####################################################################
 #  TESTING
 def _main_test():
     try: url, filename = sys.argv[1:3]
 diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
 index 8731aed..d699b61 100644
 --- a/urlgrabber/mirror.py
 +++ b/urlgrabber/mirror.py
@@ -76,6 +76,9 @@ CUSTOMIZATION
        'grabber' is omitted, the default grabber will be used.  If
        kwargs are omitted, then (duh) they will not be used.
 +       kwarg 'max_connections' is used to store the max connection
 +       limit of this mirror.
 +
     3) Pass keyword arguments when instantiating the mirror group.
        See, for example, the failure_callback argument.
@@ -91,6 +94,7 @@ import random
 import thread  # needed for locking to make this threadsafe
 from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
 +from grabber import _run_callback, _do_raise, _async_queue
 def _(st): 
     return st
@@ -254,7 +258,7 @@ class MirrorGroup:
     # if these values are found in **kwargs passed to one of the urlXXX
     # methods, they will be stripped before getting passed on to the
     # grabber
 -    options = ['default_action', 'failure_callback']
 +    options = ['default_action', 'failure_callback', 'failfunc']
     def _process_kwargs(self, kwargs):
         self.failure_callback = kwargs.get('failure_callback')
@@ -403,10 +407,25 @@ class MirrorGroup:
                 self._failure(gr, obj)
     def urlgrab(self, url, filename=None, **kwargs):
 +        if kwargs.get('async'):
 +            opts = self.grabber.opts.derive(**kwargs)
 +            opts.mirror_group = self, set()
 +            opts.relative_url = _to_utf8(url)
 +
 +            opts.url = 'http://tbd'
 +            opts.filename = filename
 +            opts.size = int(opts.size or 0)
 +            _async_queue.append(opts)
 +            return filename
 +
         kw = dict(kwargs)
         kw['filename'] = filename
         func = 'urlgrab'
 -        return self._mirror_try(func, url, kw)
 +        try:
 +            return self._mirror_try(func, url, kw)
 +        except URLGrabError, e:
 +            obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs)
 +            return _run_callback(kwargs.get('failfunc', _do_raise), obj)
     def urlopen(self, url, **kwargs):
         kw = dict(kwargs)
 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
 index 3d7e99a..4c126c5 100644
 --- a/urlgrabber/progress.py
 +++ b/urlgrabber/progress.py
@@ -576,7 +576,6 @@ class TextMultiFileMeter(MultiFileMeter):
             self.fo.write(out)
         finally:
             self._lock.release()
 -        self._do_update_meter(meter, now)
     def _do_failure_meter(self, meter, message, now):
         self._lock.acquire()
@@ -599,15 +598,6 @@ class TextMultiFileMeter(MultiFileMeter):
             pass
         finally:
             self._lock.release()
 -
 -    def _do_end(self, now):
 -        self._do_update_meter(None, now)
 -        self._lock.acquire()
 -        try:
 -            self.fo.write('\n')
 -            self.fo.flush()
 -        finally:
 -            self._lock.release()
 ######################################################################
 # support classes and functions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,9 +3,10 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 11%{?dist}
+Release: 12%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 Patch2: multi-downloader.patch
 License: LGPLv2+
 Group: Development/Libraries
@ -24,6 +25,7 @@ authentication, proxies and more.
 %prep
 %setup -q -n urlgrabber-%{version}
 %patch1 -p1
 %patch2 -p1
 %build
 python setup.py build
@ -41,8 +43,13 @@ rm -rf $RPM_BUILD_ROOT
 %doc ChangeLog LICENSE README TODO
 %{python_sitelib}/urlgrabber*
 %{_bindir}/urlgrabber
 %attr(0755,root,root) /usr/libexec/urlgrabber-ext-down
 %changelog
 * Mon May 14 2012 Zdeněk Pavlas <zpavlas@redhat.com> - 3.9.1-12
 - Update to latest HEAD
 - Merge multi-downloader patches
 * Sat Jan 14 2012 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.9.1-11
 - Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -1,3 +1,16 @@
 diff --git a/.gitignore b/.gitignore
 new file mode 100644
 index 0000000..1ffe416
 --- /dev/null
 +++ b/.gitignore
@@ -0,0 +1,7 @@
 +*.py[co]
 +MANIFEST
 +dist
 +build
 +*.kdev*
 +*.kateproject
 +ipython.log*
 diff --git a/scripts/urlgrabber b/scripts/urlgrabber
 index 518e512..09cd896 100644
 --- a/scripts/urlgrabber
@ -125,9 +138,18 @@ index 3e5f3b7..8eeaeda 100644
     return (fb,lb)
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..b2770c5 100644
+index e090e90..38ae1f7 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -49,7 +49,7 @@ GENERAL ARGUMENTS (kwargs)
   progress_obj = None
     a class instance that supports the following methods:
 -      po.start(filename, url, basename, length, text)
 +      po.start(filename, url, basename, size, now, text)
       # length will be None if unknown
       po.update(read) # read == bytes read so far
       po.end()
@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
     (which can be set on default_grabber.throttle) is used. See
     BANDWIDTH THROTTLING for more information.
@ -150,7 +172,22 @@ index e090e90..b2770c5 100644
   bandwidth = 0
-@@ -198,6 +198,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -143,8 +143,12 @@ GENERAL ARGUMENTS (kwargs)
     note that proxy authentication information may be provided using
     normal URL constructs:
       proxies={ 'http' : 'http://user:host@foo:3128' }
 -    Lastly, if proxies is None, the default environment settings will
 -    be used.
 +
 +  libproxy = False
 +
 +    Use the libproxy module (if installed) to find proxies.
 +    The libproxy code is only used if the proxies dictionary
 +    does not provide any proxies.
   prefix = None
@@ -198,6 +202,12 @@ GENERAL ARGUMENTS (kwargs)
     control, you should probably subclass URLParser and pass it in via
     the 'urlparser' option.
@ -163,11 +200,57 @@ index e090e90..b2770c5 100644
   ssl_ca_cert = None
     this option can be used if M2Crypto is available and will be
-@@ -248,6 +254,11 @@ GENERAL ARGUMENTS (kwargs)
+@@ -211,43 +221,48 @@ GENERAL ARGUMENTS (kwargs)
     No-op when using the curl backend (default)
 -  self.ssl_verify_peer = True 
 +  ssl_verify_peer = True
     Check the server's certificate to make sure it is valid with what our CA validates
 -  self.ssl_verify_host = True
 +  ssl_verify_host = True
     Check the server's hostname to make sure it matches the certificate DN
 -  self.ssl_key = None
 +  ssl_key = None
     Path to the key the client should use to connect/authenticate with
 -  self.ssl_key_type = 'PEM' 
 +  ssl_key_type = 'PEM'
     PEM or DER - format of key
 -  self.ssl_cert = None
 +  ssl_cert = None
     Path to the ssl certificate the client should use to to authenticate with
 -  self.ssl_cert_type = 'PEM' 
 +  ssl_cert_type = 'PEM'
     PEM or DER - format of certificate
 -  self.ssl_key_pass = None 
 +  ssl_key_pass = None
     password to access the ssl_key
 -  self.size = None
 +  size = None
     size (in bytes) or Maximum size of the thing being downloaded. 
     This is mostly to keep us from exploding with an endless datastream
 -  self.max_header_size = 2097152 
 +  max_header_size = 2097152
     Maximum size (in bytes) of the headers.
-+  self.ip_resolve = 'whatever'
+  ip_resolve = 'whatever'
 +
 +    What type of name to IP resolving to use, default is to do both IPV4 and
 +    IPV6.
@ -175,7 +258,7 @@ index e090e90..b2770c5 100644
 RETRY RELATED ARGUMENTS
-@@ -420,6 +431,7 @@ import time
+@@ -420,6 +435,7 @@ import time
 import string
 import urllib
 import urllib2
@ -183,7 +266,22 @@ index e090e90..b2770c5 100644
 import mimetools
 import thread
 import types
-@@ -439,6 +451,12 @@ try:
+@@ -431,6 +447,14 @@ from httplib import HTTPException
 import socket
 from byterange import range_tuple_normalize, range_tuple_to_header, RangeError
 +try:
 +    import xattr
 +    if not hasattr(xattr, 'set'):
 +        xattr = None # This is a "newer" API.
 +except ImportError:
 +    xattr = None
 +
 +
 ########################################################################
 #                     MODULE INITIALIZATION
 ########################################################################
@@ -439,6 +463,12 @@ try:
 except:
     __version__ = '???'
@ -196,7 +294,7 @@ index e090e90..b2770c5 100644
 ########################################################################
 # functions for debugging output.  These functions are here because they
 # are also part of the module initialization.
-@@ -527,6 +545,22 @@ def _(st):
+@@ -527,6 +557,22 @@ def _(st):
 #                 END MODULE INITIALIZATION
 ########################################################################
@ -219,7 +317,7 @@ index e090e90..b2770c5 100644
 class URLGrabError(IOError):
-@@ -662,6 +696,7 @@ class URLParser:
+@@ -662,6 +708,7 @@ class URLParser:
           opts.quote = 0     --> do not quote it
           opts.quote = None  --> guess
         """
@ -227,15 +325,59 @@ index e090e90..b2770c5 100644
         quote = opts.quote
         if opts.prefix:
-@@ -800,6 +835,7 @@ class URLGrabberOptions:
+@@ -768,6 +815,41 @@ class URLGrabberOptions:
         else: # throttle is a float
             return self.bandwidth * self.throttle
 +    def find_proxy(self, url, scheme):
 +        """Find the proxy to use for this URL.
 +        Use the proxies dictionary first, then libproxy.
 +        """
 +        self.proxy = None
 +        if scheme not in ('ftp', 'http', 'https'):
 +            return
 +
 +        if self.proxies:
 +            proxy = self.proxies.get(scheme)
 +            if proxy is None:
 +                if scheme == 'http':
 +                    proxy = self.proxies.get('https')
 +                elif scheme == 'https':
 +                    proxy = self.proxies.get('http')
 +            if proxy == '_none_':
 +                proxy = ''
 +            self.proxy = proxy
 +            return
 +
 +        if self.libproxy:
 +            global _libproxy_cache
 +            if _libproxy_cache is None:
 +                try:
 +                    import libproxy
 +                    _libproxy_cache = libproxy.ProxyFactory()
 +                except:
 +                    _libproxy_cache = False
 +            if _libproxy_cache:
 +                for proxy in _libproxy_cache.getProxies(url):
 +                    if proxy.startswith('http://'):
 +                        if DEBUG: DEBUG.info('using proxy "%s" for url %s' % (proxy, url))
 +                        self.proxy = proxy
 +                        break
 +
     def derive(self, **kwargs):
         """Create a derived URLGrabberOptions instance.
         This method creates a new instance and overrides the
@@ -800,21 +882,25 @@ class URLGrabberOptions:
         self.close_connection = 0
         self.range = None
         self.user_agent = 'urlgrabber/%s' % __version__
 +        self.ip_resolve = None
         self.keepalive = 1
         self.proxies = None
 +        self.libproxy = False
         self.reget = None
-@@ -808,13 +844,15 @@ class URLGrabberOptions:
+         self.failure_callback = None
         self.interrupt_callback = None
         self.prefix = None
         self.opener = None
         self.cache_openers = True
@ -252,7 +394,7 @@ index e090e90..b2770c5 100644
         self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
         self.ssl_context = None # no-op in pycurl
         self.ssl_verify_peer = True # check peer's cert for authenticityb
-@@ -846,7 +884,7 @@ class URLGrabberOptions:
+@@ -846,7 +932,7 @@ class URLGrabberOptions:
         s = s + indent + '}'
         return s
@ -261,8 +403,28 @@ index e090e90..b2770c5 100644
     """Provides easy opening of URLs with a variety of options.
     All options are specified as kwargs. Options may be specified when
-@@ -931,6 +969,9 @@ class URLGrabber:
+@@ -912,9 +998,11 @@ class URLGrabber:
         returned that supports them. The file object can be treated 
         like any other file object.
         """
 +        url = _to_utf8(url)
         opts = self.opts.derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
 +        opts.find_proxy(url, parts[0])
         def retryfunc(opts, url):
             return PyCurlFileObject(url, filename=None, opts=opts)
         return self._retry(opts, retryfunc, url)
@@ -925,12 +1013,17 @@ class URLGrabber:
         urlgrab returns the filename of the local file, which may be 
         different from the passed-in filename if copy_local == 0.
         """
 +        url = _to_utf8(url)
         opts = self.opts.derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
         (scheme, host, path, parm, query, frag) = parts
 +        opts.find_proxy(url, scheme)
         if filename is None:
             filename = os.path.basename( urllib.unquote(path) )
 +            if not filename:
@ -271,7 +433,19 @@ index e090e90..b2770c5 100644
         if scheme == 'file' and not opts.copy_local:
             # just return the name of the local file - don't make a 
             # copy currently
-@@ -1030,7 +1071,7 @@ class URLGrabber:
+@@ -982,9 +1075,11 @@ class URLGrabber:
         "I want the first N bytes" but rather 'read the whole file 
         into memory, but don't use too much'
         """
 +        url = _to_utf8(url)
         opts = self.opts.derive(**kwargs)
         if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
 +        opts.find_proxy(url, parts[0])
         if limit is not None:
             limit = limit + 1
@@ -1030,7 +1125,7 @@ class URLGrabber:
 default_grabber = URLGrabber()
@ -280,7 +454,7 @@ index e090e90..b2770c5 100644
     def __init__(self, url, filename, opts):
         self.fo = None
         self._hdr_dump = ''
-@@ -1052,9 +1093,15 @@ class PyCurlFileObject():
+@@ -1052,10 +1147,11 @@ class PyCurlFileObject():
         self._reget_length = 0
         self._prog_running = False
         self._error = (None, None)
@ -289,15 +463,12 @@ index e090e90..b2770c5 100644
 +        self._hdr_ended = False
         self._do_open()
 -        
 +
 +    def geturl(self):
 +        """ Provide the geturl() method, used to be got from
 +            urllib.addinfourl, via. urllib.URLopener.* """
 +        return self.url
     def __getattr__(self, name):
         """This effectively allows us to wrap at the instance level.
-@@ -1085,9 +1132,14 @@ class PyCurlFileObject():
+         Any attribute not found in _this_ object will be searched for
@@ -1085,9 +1181,14 @@ class PyCurlFileObject():
             return -1
     def _hdr_retrieve(self, buf):
@ -313,7 +484,7 @@ index e090e90..b2770c5 100644
         try:
             self._hdr_dump += buf
             # we have to get the size before we do the progress obj start
-@@ -1104,7 +1156,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1205,17 @@ class PyCurlFileObject():
                     s = parse150(buf)
                 if s:
                     self.size = int(s)
@ -332,7 +503,7 @@ index e090e90..b2770c5 100644
             return len(buf)
         except KeyboardInterrupt:
             return pycurl.READFUNC_ABORT
-@@ -1113,8 +1175,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1224,10 @@ class PyCurlFileObject():
         if self._parsed_hdr:
             return self._parsed_hdr
         statusend = self._hdr_dump.find('\n')
@ -343,7 +514,17 @@ index e090e90..b2770c5 100644
         self._parsed_hdr =  mimetools.Message(hdrfp)
         return self._parsed_hdr
-@@ -1136,11 +1200,21 @@ class PyCurlFileObject():
+@@ -1127,6 +1240,9 @@ class PyCurlFileObject():
         if not opts:
             opts = self.opts
 +        # keepalives
 +        if not opts.keepalive:
 +            self.curl_obj.setopt(pycurl.FORBID_REUSE, 1)
         # defaults we're always going to set
         self.curl_obj.setopt(pycurl.NOPROGRESS, False)
@@ -1136,11 +1252,21 @@ class PyCurlFileObject():
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
         self.curl_obj.setopt(pycurl.FAILONERROR, True)
         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -365,7 +546,7 @@ index e090e90..b2770c5 100644
         # maybe to be options later
         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1222,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1274,11 @@ class PyCurlFileObject():
         # timeouts
         timeout = 300
@ -380,11 +561,50 @@ index e090e90..b2770c5 100644
         # ssl options
         if self.scheme == 'https':
-@@ -1203,12 +1279,19 @@ class PyCurlFileObject():
+@@ -1158,13 +1286,16 @@ class PyCurlFileObject():
-                         if proxy == '_none_': proxy = ""
+                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
-                         self.curl_obj.setopt(pycurl.PROXY, proxy)
+                 self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
             self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
 -            self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
 +            if opts.ssl_verify_host: # 1 is meaningless to curl
 +                self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, 2)
             if opts.ssl_key:
                 self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
             if opts.ssl_key_type:
                 self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type)
             if opts.ssl_cert:
                 self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
 +                # if we have a client side cert - turn off reuse b/c nss is odd
 +                self.curl_obj.setopt(pycurl.FORBID_REUSE, 1)
             if opts.ssl_cert_type:                
                 self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
             if opts.ssl_key_pass:
@@ -1187,28 +1318,24 @@ class PyCurlFileObject():
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
 -        # proxy settings
 -        if opts.proxies:
 -            for (scheme, proxy) in opts.proxies.items():
 -                if self.scheme in ('ftp'): # only set the ftp proxy for ftp items
 -                    if scheme not in ('ftp'):
 -                        continue
 -                    else:
 -                        if proxy == '_none_': proxy = ""
 -                        self.curl_obj.setopt(pycurl.PROXY, proxy)
 -                elif self.scheme in ('http', 'https'):
 -                    if scheme not in ('http', 'https'):
 -                        continue
 -                    else:
 -                        if proxy == '_none_': proxy = ""
 -                        self.curl_obj.setopt(pycurl.PROXY, proxy)
 -            
 -        # FIXME username/password/auth settings
 +        # proxy
 +        if opts.proxy is not None:
 +            self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
 +            self.curl_obj.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
 +
 +        if opts.username and opts.password:
 +            if self.scheme in ('http', 'https'):
 +                self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
@ -402,7 +622,23 @@ index e090e90..b2770c5 100644
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,12 +1311,14 @@ class PyCurlFileObject():
+@@ -1219,8 +1346,14 @@ class PyCurlFileObject():
             return
         try:
 +            e = None
             self.curl_obj.perform()
 -        except pycurl.error, e:
 +        except pycurl.error, e: pass
 +        self._do_perform_exc(e)
 +
 +    def _do_perform_exc(self, e):
 +        # handle pycurl exception 'e'
 +        if e:
             # XXX - break some of these out a bit more clearly
             # to other URLGrabErrors from 
             # http://curl.haxx.se/libcurl/c/libcurl-errors.html
@@ -1228,12 +1361,14 @@ class PyCurlFileObject():
             code = self.http_code
             errcode = e.args[0]
@ -419,7 +655,7 @@ index e090e90..b2770c5 100644
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
-@@ -1244,23 +1329,23 @@ class PyCurlFileObject():
+@@ -1244,23 +1379,23 @@ class PyCurlFileObject():
                 raise KeyboardInterrupt
             elif errcode == 28:
@ -450,7 +686,7 @@ index e090e90..b2770c5 100644
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
                 # which almost always means something aborted it from outside
-@@ -1272,33 +1357,93 @@ class PyCurlFileObject():
+@@ -1272,33 +1407,94 @@ class PyCurlFileObject():
             elif errcode == 58:
                 msg = _("problem with the local client certificate")
                 err = URLGrabError(14, msg)
@ -519,6 +755,7 @@ index e090e90..b2770c5 100644
 +                             42 : _("Aborted by callback"),
 +                             47 : _("Too many redirects"),
 +                             51 : _("Peer certificate failed verification"),
 +                             52 : _("Got nothing: SSL certificate expired?"),
 +                             53 : _("SSL engine not found"),
 +                             54 : _("SSL engine set failed"),
 +                             55 : _("Network error send()"),
@ -545,13 +782,13 @@ index e090e90..b2770c5 100644
 +        else:
 +            if self._error[1]:
 +                msg = self._error[1]
-+                err = URLGRabError(14, msg)
+                err = URLGrabError(14, msg)
 +                err.url = urllib.unquote(self.url)
 +                raise err
     def _do_open(self):
         self.curl_obj = _curl_cache
-@@ -1333,7 +1478,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1529,11 @@ class PyCurlFileObject():
         if self.opts.range:
             rt = self.opts.range
@ -564,36 +801,54 @@ index e090e90..b2770c5 100644
         if rt:
             header = range_tuple_to_header(rt)
-@@ -1434,9 +1583,13 @@ class PyCurlFileObject():
+@@ -1407,22 +1607,7 @@ class PyCurlFileObject():
         _was_filename = False
         if type(self.filename) in types.StringTypes and self.filename:
             _was_filename = True
 -            self._prog_reportname = str(self.filename)
 -            self._prog_basename = os.path.basename(self.filename)
 -            
 -            if self.append: mode = 'ab'
 -            else: mode = 'wb'
 -
 -            if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
 -                                 (self.filename, mode))
 -            try:
 -                self.fo = open(self.filename, mode)
 -            except IOError, e:
 -                err = URLGrabError(16, _(\
 -                  'error opening local file from %s, IOError: %s') % (self.url, e))
 -                err.url = self.url
 -                raise err
 -
 +            self._do_open_fo()
         else:
             self._prog_reportname = 'MEMORY'
             self._prog_basename = 'MEMORY'
@@ -1434,27 +1619,71 @@ class PyCurlFileObject():
             #fh, self._temp_name = mkstemp()
             #self.fo = open(self._temp_name, 'wb')
 -            
 -        self._do_perform()
 -        
 -
 -
 -        if _was_filename:
 -            # close it up
 +        try:            
 +            self._do_perform()
 +        except URLGrabError, e:
-+            self.fo.flush()
+             self.fo.flush()
-+            self.fo.close()
+             self.fo.close()
 -            # set the time
 -            mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
 -            if mod_time != -1:
 -                os.utime(self.filename, (mod_time, mod_time))
 +            raise e
 +    
- 
+        if _was_filename:
- 
+            self._do_close_fo()
         if _was_filename:
@@ -1446,9 +1599,23 @@ class PyCurlFileObject():
             # set the time
             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
             if mod_time != -1:
 -                os.utime(self.filename, (mod_time, mod_time))
 +                try:
 +                    os.utime(self.filename, (mod_time, mod_time))
 +                except OSError, e:
 +                    err = URLGrabError(16, _(\
 +                      'error setting timestamp on file %s from %s, OSError: %s') 
 +                              % (self.filename, self.url, e))
 +                    err.url = self.url
 +                    raise err
             # re open it
 -            self.fo = open(self.filename, 'r')
 +            try:
@ -607,7 +862,61 @@ index e090e90..b2770c5 100644
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
-@@ -1532,11 +1699,14 @@ class PyCurlFileObject():
+ 
         self._complete = True
 +    def _do_open_fo(self):
 +        self._prog_reportname = str(self.filename)
 +        self._prog_basename = os.path.basename(self.filename)
 +        if self.append: mode = 'ab'
 +        else: mode = 'wb'
 +
 +        if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
 +                             (self.filename, mode))
 +        try:
 +            self.fo = open(self.filename, mode)
 +        except IOError, e:
 +            err = URLGrabError(16, _(\
 +              'error opening local file from %s, IOError: %s') % (self.url, e))
 +            err.url = self.url
 +            raise err
 +
 +    def _do_close_fo(self):
 +        # close it up
 +        self.fo.flush()
 +        self.fo.close()
 +
 +        # Set the URL where we got it from:
 +        if xattr is not None:
 +            # See: http://www.freedesktop.org/wiki/CommonExtendedAttributes
 +            try:
 +                xattr.set(self.filename, 'user.xdg.origin.url', self.url)
 +            except:
 +                pass # URL too long. = IOError ... ignore everything.
 +
 +        # set the time
 +        mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
 +        if mod_time != -1:
 +            try:
 +                os.utime(self.filename, (mod_time, mod_time))
 +            except OSError, e:
 +                err = URLGrabError(16, _(\
 +                  'error setting timestamp on file %s from %s, OSError: %s') 
 +                          % (self.filename, self.url, e))
 +                err.url = self.url
 +                raise err
 +
     def _fill_buffer(self, amt=None):
         """fill the buffer to contain at least 'amt' bytes by reading
         from the underlying file object.  If amt is None, then it will
@@ -1526,17 +1755,20 @@ class PyCurlFileObject():
             if self._prog_running:
                 downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
 -        except KeyboardInterrupt:
 +        except (KeyboardInterrupt, IOError):
             return -1
     def _over_max_size(self, cur, max_size=None):
         if not max_size:
@ -626,7 +935,7 @@ index e090e90..b2770c5 100644
             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
-@@ -1544,13 +1714,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1776,6 @@ class PyCurlFileObject():
             return True
         return False
@ -640,7 +949,7 @@ index e090e90..b2770c5 100644
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
-@@ -1582,9 +1745,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1807,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()
@ -658,7 +967,7 @@ index e090e90..b2770c5 100644
 +    _curl_cache.close()
 +    _curl_cache = pycurl.Curl()
 +
-+
+_libproxy_cache = None
 +    
 #####################################################################
@ -687,10 +996,199 @@ index dad410b..8731aed 100644
         return parsed_mirrors
 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
-index dd07c6a..45eb248 100644
+index dd07c6a..3d7e99a 100644
 --- a/urlgrabber/progress.py
 +++ b/urlgrabber/progress.py
-@@ -658,6 +658,8 @@ def format_time(seconds, use_hours=0):
+@@ -211,6 +211,21 @@ def text_meter_total_size(size, downloaded=0):
 #        4. +                     ( 5, total: 32)
 #
 +def _term_add_bar(tl, bar_max_length, pc):
 +    blen = bar_max_length
 +    bar  = '='*int(blen * pc)
 +    if (blen * pc) - int(blen * pc) >= 0.5:
 +        bar += '-'
 +    return tl.add(' [%-*.*s]' % (blen, blen, bar))
 +
 +def _term_add_end(tl, osize, size):
 +    if osize is not None:
 +        if size > osize: # Is ??? better? Really need something to say < vs >.
 +            return tl.add(' !!! '), True
 +        elif size != osize:
 +            return tl.add(' ... '), True
 +    return tl.add(' ' * 5), False
 +
 class TextMeter(BaseMeter):
     def __init__(self, fo=sys.stderr):
         BaseMeter.__init__(self)
@@ -259,13 +274,10 @@ class TextMeter(BaseMeter):
             ui_rate = tl.add(' %5sB/s' % ave_dl)
             # Make text grow a bit before we start growing the bar too
             blen = 4 + tl.rest_split(8 + 8 + 4)
 -            bar  = '='*int(blen * frac)
 -            if (blen * frac) - int(blen * frac) >= 0.5:
 -                bar += '-'
 -            ui_bar  = tl.add(' [%-*.*s]' % (blen, blen, bar))
 -            out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
 -                                              ui_sofar_pc, ui_pc, ui_bar,
 -                                              ui_rate, ui_size, ui_time, ui_end)
 +            ui_bar = _term_add_bar(tl, blen, frac)
 +            out = '\r%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
 +                                                ui_sofar_pc, ui_pc, ui_bar,
 +                                                ui_rate,ui_size,ui_time, ui_end)
         self.fo.write(out)
         self.fo.flush()
@@ -284,12 +296,7 @@ class TextMeter(BaseMeter):
         tl = TerminalLine(8)
         ui_size = tl.add(' | %5sB' % total_size)
         ui_time = tl.add(' %9s' % total_time)
 -        not_done = self.size is not None and amount_read != self.size
 -        if not_done:
 -            ui_end  = tl.add(' ... ')
 -        else:
 -            ui_end  = tl.add(' ' * 5)
 -
 +        ui_end, not_done = _term_add_end(tl, self.size, amount_read)
         out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
                                     ui_size, ui_time, ui_end)
         self.fo.write(out)
@@ -331,12 +338,21 @@ class MultiFileHelper(BaseMeter):
     def message(self, message):
         self.master.message_meter(self, message)
 +class _FakeLock:
 +    def acquire(self):
 +        pass
 +    def release(self):
 +        pass
 +
 class MultiFileMeter:
     helperclass = MultiFileHelper
 -    def __init__(self):
 +    def __init__(self, threaded=True):
         self.meters = []
         self.in_progress_meters = []
 -        self._lock = thread.allocate_lock()
 +        if threaded:
 +            self._lock = thread.allocate_lock()
 +        else:
 +            self._lock = _FakeLock()
         self.update_period = 0.3 # seconds
         self.numfiles         = None
@@ -369,6 +385,7 @@ class MultiFileMeter:
     def end(self, now=None):
         if now is None: now = time.time()
 +        self.re.update(self._amount_read(), now)
         self._do_end(now)
     def _do_end(self, now):
@@ -466,11 +483,20 @@ class MultiFileMeter:
 class TextMultiFileMeter(MultiFileMeter):
 -    def __init__(self, fo=sys.stderr):
 +    def __init__(self, fo=sys.stderr, threaded=True):
         self.fo = fo
 -        MultiFileMeter.__init__(self)
 +        MultiFileMeter.__init__(self, threaded)
     # files: ###/### ###%  data: ######/###### ###%  time: ##:##:##/##:##:##
 +# New output, like TextMeter output...
 +#       update: Size, All files
 +#       -----------------------
 +# (<#file>/<#tot files>): <text> <pc> <bar> <rate> | <size> <eta time> ETA
 +#                          8-22 1 3-4 1 6-12 1   8 3     6 1        9 1  3 1
 +#       end
 +#       ---
 +# <text>                                 | <file size> <file elapsed time> 
 +#  8-56                                  3          6 1                 9 5
     def _do_update_meter(self, meter, now):
         self._lock.acquire()
         try:
@@ -480,7 +506,7 @@ class TextMultiFileMeter(MultiFileMeter):
             tf = self.numfiles or 1
             pf = 100 * float(df)/tf + 0.49
             dd = self.re.last_amount_read
 -            td = self.total_size
 +            td = self.re.total
             pd = 100 * (self.re.fraction_read() or 0) + 0.49
             dt = self.re.elapsed_time()
             rt = self.re.remaining_time()
@@ -491,9 +517,33 @@ class TextMultiFileMeter(MultiFileMeter):
             ftd = format_number(td) + 'B'
             fdt = format_time(dt, 1)
             ftt = format_time(tt, 1)
 -            
 -            out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
 -            self.fo.write('\r' + out)
 +
 +            frac = self.re.fraction_read() or 0
 +            ave_dl = format_number(self.re.average_rate())
 +            text = meter.text or meter.basename
 +            if tf > 1:
 +                text = '(%u/%u): %s' % (df+1, tf, text)
 +
 +            # Include text + ui_rate in minimal
 +            tl = TerminalLine(8, 8+1+8)
 +
 +            ui_size = tl.add(' | %5sB' % format_number(dd))
 +
 +            ui_time = tl.add(' %9s' % format_time(rt))
 +            ui_end  = tl.add(' ETA ')
 +
 +            ui_sofar_pc = tl.add(' %i%%' % pf,
 +                                 full_len=len(" (100%)"))
 +            ui_rate = tl.add(' %5sB/s' % ave_dl)
 +
 +            # Make text grow a bit before we start growing the bar too
 +            blen = 4 + tl.rest_split(8 + 8 + 4)
 +            ui_bar = _term_add_bar(tl, blen, frac)
 +            out = '\r%-*.*s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
 +                                              ui_sofar_pc, ui_bar,
 +                                              ui_rate, ui_size, ui_time,
 +                                              ui_end)
 +            self.fo.write(out)
             self.fo.flush()
         finally:
             self._lock.release()
@@ -502,15 +552,28 @@ class TextMultiFileMeter(MultiFileMeter):
         self._lock.acquire()
         try:
             format = "%-30.30s %6.6s    %8.8s    %9.9s"
 -            fn = meter.basename
 +            fn = meter.text or meter.basename
             size = meter.last_amount_read
             fsize = format_number(size) + 'B'
             et = meter.re.elapsed_time()
             fet = format_time(et, 1)
 -            frate = format_number(size / et) + 'B/s'
 -            
 -            out = '%-79.79s' % (format % (fn, fsize, fet, frate))
 -            self.fo.write('\r' + out + '\n')
 +            frate = format_number(et and size / et) + 'B/s'
 +            df = self.finished_files
 +            tf = self.numfiles or 1
 +
 +            total_time = format_time(et)
 +            total_size = format_number(size)
 +            text = meter.text or meter.basename
 +            if tf > 1:
 +                text = '(%u/%u): %s' % (df, tf, text)
 +
 +            tl = TerminalLine(8)
 +            ui_size = tl.add(' | %5sB' % total_size)
 +            ui_time = tl.add(' %9s' % total_time)
 +            ui_end, not_done = _term_add_end(tl, meter.size, size)
 +            out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
 +                                        ui_size, ui_time, ui_end)
 +            self.fo.write(out)
         finally:
             self._lock.release()
         self._do_update_meter(meter, now)
@@ -658,6 +721,8 @@ def format_time(seconds, use_hours=0):
     if seconds is None or seconds < 0:
         if use_hours: return '--:--:--'
         else:         return '--:--'