diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index d48abe1..f026a56 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,8 +3,9 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.10 -Release: 0%{?dist} +Release: 1%{?dist} Source0: http://urlgrabber.baseurl.org/download/urlgrabber-%{version}.tar.gz +Patch1: urlgrabber-HEAD.patch License: LGPLv2+ Group: Development/Libraries @@ -22,6 +23,7 @@ authentication, proxies and more. %prep %setup -q -n urlgrabber-%{version} +%patch1 -p1 %build python setup.py build @@ -42,6 +44,10 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Mon Dec 9 2013 Zdenek Pavlas - 3.10-1 +- Process mirror retries before other queued requests. +- Tell curl to return immediately on ctrl-c. BZ 1017491 + * Wed Oct 9 2013 Zdenek Pavlas - 3.10-0 - Update to latest HEAD. - clamp timestamps from the future. BZ 894630, 1013733 diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch new file mode 100644 index 0000000..e2203c4 --- /dev/null +++ b/urlgrabber-HEAD.patch @@ -0,0 +1,71 @@ +diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py +index b004f4d..d19ca01 100644 +--- a/urlgrabber/grabber.py ++++ b/urlgrabber/grabber.py +@@ -1893,6 +1893,8 @@ class PyCurlFileObject(object): + urllib.addinfourl, via. urllib.URLopener.* """ + return self.url + ++# tell curl to return immediately on ctrl-c ++pycurl.global_init(pycurl.GLOBAL_DEFAULT | pycurl.GLOBAL_ACK_EINTR) + _curl_cache = pycurl.Curl() # make one and reuse it over and over and over + + def reset_curl_obj(): +@@ -2170,6 +2172,7 @@ def parallel_wait(meter=None): + dl = _ExternalDownloaderPool() + host_con = {} # current host connection counts + single = set() # hosts in single connection mode ++ retry_queue = [] + + def start(opts, tries): + opts.tries = tries +@@ -2221,6 +2224,9 @@ def parallel_wait(meter=None): + # Turn on the max_connections=1 override. BZ 853432 + if DEBUG: DEBUG.info('max_connections(%s) %s => 1', key, limit) + single.add(key) ++ # When using multi-downloader the parent's _curl_cache ++ # object is idle. Kill it, as it might use keepalive=1. ++ reset_curl_obj() + + retry = opts.retry or 0 + if opts.failure_callback: +@@ -2247,7 +2253,7 @@ def parallel_wait(meter=None): + # mask this mirror and retry + if action.get('remove', 1): + removed.add(key) +- _async_queue.append(opts) ++ retry_queue.append(opts) + continue + # fail=1 from callback + ug_err.errors = errors +@@ -2257,19 +2263,22 @@ def parallel_wait(meter=None): + _run_callback(opts.failfunc, opts) + + try: +- idx = 0 ++ retry_idx = idx = 0 + while True: +- if idx >= len(_async_queue): +- # the queue is empty ++ if retry_idx < len(retry_queue): ++ # retries first ++ opts = retry_queue[retry_idx] ++ retry_idx += 1 ++ elif idx < len(_async_queue): ++ # handle next request ++ opts = _async_queue[idx] ++ idx += 1 ++ else: ++ # both queues are empty + if not dl.running: break +- # pending dl may extend it + perform() + continue + +- # handle next request +- opts = _async_queue[idx] +- idx += 1 +- + # check global limit + while len(dl.running) >= default_grabber.opts.max_connections: + perform()