diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index b004f4d..d19ca01 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -1893,6 +1893,8 @@ class PyCurlFileObject(object): urllib.addinfourl, via. urllib.URLopener.* """ return self.url +# tell curl to return immediately on ctrl-c +pycurl.global_init(pycurl.GLOBAL_DEFAULT | pycurl.GLOBAL_ACK_EINTR) _curl_cache = pycurl.Curl() # make one and reuse it over and over and over def reset_curl_obj(): @@ -2170,6 +2172,7 @@ def parallel_wait(meter=None): dl = _ExternalDownloaderPool() host_con = {} # current host connection counts single = set() # hosts in single connection mode + retry_queue = [] def start(opts, tries): opts.tries = tries @@ -2221,6 +2224,9 @@ def parallel_wait(meter=None): # Turn on the max_connections=1 override. BZ 853432 if DEBUG: DEBUG.info('max_connections(%s) %s => 1', key, limit) single.add(key) + # When using multi-downloader the parent's _curl_cache + # object is idle. Kill it, as it might use keepalive=1. + reset_curl_obj() retry = opts.retry or 0 if opts.failure_callback: @@ -2247,7 +2253,7 @@ def parallel_wait(meter=None): # mask this mirror and retry if action.get('remove', 1): removed.add(key) - _async_queue.append(opts) + retry_queue.append(opts) continue # fail=1 from callback ug_err.errors = errors @@ -2257,19 +2263,22 @@ def parallel_wait(meter=None): _run_callback(opts.failfunc, opts) try: - idx = 0 + retry_idx = idx = 0 while True: - if idx >= len(_async_queue): - # the queue is empty + if retry_idx < len(retry_queue): + # retries first + opts = retry_queue[retry_idx] + retry_idx += 1 + elif idx < len(_async_queue): + # handle next request + opts = _async_queue[idx] + idx += 1 + else: + # both queues are empty if not dl.running: break - # pending dl may extend it perform() continue - # handle next request - opts = _async_queue[idx] - idx += 1 - # check global limit while len(dl.running) >= default_grabber.opts.max_connections: perform()