Update to latest HEAD.

- Fix parsing of FTP 213 responses
- Switch to max_connections=1 after timing out.  BZ 853432
- max_connections=0 should imply the default limit.
epel9
Zdenek Pavlas 12 years ago
parent 4b9511117b
commit d2b26353b7

@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
Release: 28%{?dist}
Release: 29%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@ -44,6 +44,12 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
* Tue Jun 18 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-29
- Update to latest HEAD.
- Fix parsing of FTP 213 responses
- Switch to max_connections=1 after timing out. BZ 853432
- max_connections=0 should imply the default limit.
* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-28
- Update to latest HEAD.
- Add the "minrate" option. BZ 964298

@ -314,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index e090e90..05ea9c3 100644
index e090e90..6b409e3 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -920,7 +920,7 @@ index e090e90..05ea9c3 100644
if not self._prog_running:
if self.opts.progress_obj:
size = self.size + self._reget_length
@@ -1079,23 +1274,40 @@ class PyCurlFileObject():
@@ -1079,32 +1274,62 @@ class PyCurlFileObject():
self.opts.progress_obj.update(self._amount_read)
self._amount_read += len(buf)
@ -967,7 +967,10 @@ index e090e90..05ea9c3 100644
elif self.scheme in ['ftp']:
s = None
if buf.startswith('213 '):
@@ -1104,7 +1316,18 @@ class PyCurlFileObject():
s = buf[3:].strip()
+ if len(s) >= 14:
+ s = None # ignore MDTM responses
elif buf.startswith('150 '):
s = parse150(buf)
if s:
self.size = int(s)
@ -987,7 +990,7 @@ index e090e90..05ea9c3 100644
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
@@ -1113,8 +1336,10 @@ class PyCurlFileObject():
@@ -1113,8 +1338,10 @@ class PyCurlFileObject():
if self._parsed_hdr:
return self._parsed_hdr
statusend = self._hdr_dump.find('\n')
@ -998,7 +1001,7 @@ index e090e90..05ea9c3 100644
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
@@ -1127,6 +1352,9 @@ class PyCurlFileObject():
@@ -1127,6 +1354,9 @@ class PyCurlFileObject():
if not opts:
opts = self.opts
@ -1008,7 +1011,7 @@ index e090e90..05ea9c3 100644
# defaults we're always going to set
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
@@ -1136,11 +1364,21 @@ class PyCurlFileObject():
@@ -1136,11 +1366,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -1031,7 +1034,7 @@ index e090e90..05ea9c3 100644
# maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
@@ -1148,9 +1386,11 @@ class PyCurlFileObject():
@@ -1148,9 +1388,11 @@ class PyCurlFileObject():
# timeouts
timeout = 300
@ -1046,7 +1049,7 @@ index e090e90..05ea9c3 100644
# ssl options
if self.scheme == 'https':
@@ -1158,13 +1398,16 @@ class PyCurlFileObject():
@@ -1158,13 +1400,16 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@ -1064,7 +1067,7 @@ index e090e90..05ea9c3 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
@@ -1187,28 +1430,26 @@ class PyCurlFileObject():
@@ -1187,28 +1432,26 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@ -1109,7 +1112,7 @@ index e090e90..05ea9c3 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1228,39 +1469,26 @@ class PyCurlFileObject():
@@ -1228,39 +1471,26 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@ -1155,7 +1158,7 @@ index e090e90..05ea9c3 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
@@ -1269,36 +1497,70 @@ class PyCurlFileObject():
@@ -1269,36 +1499,70 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
@ -1251,7 +1254,7 @@ index e090e90..05ea9c3 100644
def _do_open(self):
self.curl_obj = _curl_cache
@@ -1333,7 +1595,11 @@ class PyCurlFileObject():
@@ -1333,7 +1597,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@ -1264,7 +1267,7 @@ index e090e90..05ea9c3 100644
if rt:
header = range_tuple_to_header(rt)
@@ -1434,21 +1700,46 @@ class PyCurlFileObject():
@@ -1434,21 +1702,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@ -1318,7 +1321,7 @@ index e090e90..05ea9c3 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
@@ -1526,17 +1817,20 @@ class PyCurlFileObject():
@@ -1526,17 +1819,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@ -1344,7 +1347,7 @@ index e090e90..05ea9c3 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
@@ -1544,13 +1838,6 @@ class PyCurlFileObject():
@@ -1544,13 +1840,6 @@ class PyCurlFileObject():
return True
return False
@ -1358,7 +1361,7 @@ index e090e90..05ea9c3 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
@@ -1582,9 +1869,21 @@ class PyCurlFileObject():
@@ -1582,9 +1871,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@ -1381,7 +1384,7 @@ index e090e90..05ea9c3 100644
#####################################################################
# DEPRECATED FUNCTIONS
@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
@@ -1621,6 +1922,489 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@ -1614,6 +1617,7 @@ index e090e90..05ea9c3 100644
+
+ dl = _ExternalDownloaderPool()
+ host_con = {} # current host connection counts
+ single = set() # hosts in single connection mode
+
+ def start(opts, tries):
+ opts.tries = tries
@ -1660,6 +1664,10 @@ index e090e90..05ea9c3 100644
+
+ if ug_err is None:
+ continue
+ if ug_err.errno == pycurl.E_OPERATION_TIMEOUTED:
+ # One possible cause is connection-limited server.
+ # Turn on the max_connections=1 override. BZ 853432
+ single.add(key)
+
+ retry = opts.retry or 0
+ if opts.failure_callback:
@ -1749,7 +1757,7 @@ index e090e90..05ea9c3 100644
+
+ # update the current mirror and limit
+ key = best['mirror']
+ limit = best.get('kwargs', {}).get('max_connections', 2)
+ limit = best.get('kwargs', {}).get('max_connections') or 2
+ opts.async = key, limit
+
+ # update URL and proxy
@ -1760,6 +1768,8 @@ index e090e90..05ea9c3 100644
+
+ # check host limit, then start
+ key, limit = opts.async
+ if key in single:
+ limit = 1
+ while host_con.get(key, 0) >= limit:
+ perform()
+ if DEBUG:
@ -1865,20 +1875,21 @@ index e090e90..05ea9c3 100644
def _main_test():
try: url, filename = sys.argv[1:3]
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
index dad410b..7975f1b 100644
index dad410b..5d3aa34 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -76,6 +76,9 @@ CUSTOMIZATION
@@ -76,6 +76,10 @@ CUSTOMIZATION
'grabber' is omitted, the default grabber will be used. If
kwargs are omitted, then (duh) they will not be used.
+ kwarg 'max_connections' limits the number of concurrent
+ connections to this mirror.
+ connections to this mirror. When omitted or set to zero,
+ the default limit (2) will be used.
+
3) Pass keyword arguments when instantiating the mirror group.
See, for example, the failure_callback argument.
@@ -87,10 +90,14 @@ CUSTOMIZATION
@@ -87,10 +91,14 @@ CUSTOMIZATION
"""
@ -1894,7 +1905,7 @@ index dad410b..7975f1b 100644
def _(st):
return st
@@ -126,7 +133,9 @@ class MirrorGroup:
@@ -126,7 +134,9 @@ class MirrorGroup:
files)
* if the local list is ever exhausted, a URLGrabError will be
@ -1905,7 +1916,7 @@ index dad410b..7975f1b 100644
OPTIONS
@@ -153,7 +162,8 @@ class MirrorGroup:
@@ -153,7 +163,8 @@ class MirrorGroup:
The 'fail' option will cause immediate failure by re-raising
the exception and no further attempts to get the current
@ -1915,7 +1926,7 @@ index dad410b..7975f1b 100644
This dict can be set at instantiation time,
mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
@@ -184,6 +194,7 @@ class MirrorGroup:
@@ -184,6 +195,7 @@ class MirrorGroup:
obj.exception = < exception that was raised >
obj.mirror = < the mirror that was tried >
@ -1923,7 +1934,7 @@ index dad410b..7975f1b 100644
obj.relative_url = < url relative to the mirror >
obj.url = < full url that failed >
# .url is just the combination of .mirror
@@ -251,6 +262,17 @@ class MirrorGroup:
@@ -251,6 +263,17 @@ class MirrorGroup:
self.default_action = None
self._process_kwargs(kwargs)
@ -1941,7 +1952,7 @@ index dad410b..7975f1b 100644
# if these values are found in **kwargs passed to one of the urlXXX
# methods, they will be stripped before getting passed on to the
# grabber
@@ -263,7 +285,8 @@ class MirrorGroup:
@@ -263,7 +286,8 @@ class MirrorGroup:
def _parse_mirrors(self, mirrors):
parsed_mirrors = []
for m in mirrors:
@ -1951,7 +1962,7 @@ index dad410b..7975f1b 100644
parsed_mirrors.append(m)
return parsed_mirrors
@@ -280,7 +303,9 @@ class MirrorGroup:
@@ -280,7 +304,9 @@ class MirrorGroup:
# return a random mirror so that multiple mirrors get used
# even without failures.
if not gr.mirrors:
@ -1962,7 +1973,7 @@ index dad410b..7975f1b 100644
return gr.mirrors[gr._next]
def _failure(self, gr, cb_obj):
@@ -307,7 +332,9 @@ class MirrorGroup:
@@ -307,7 +333,9 @@ class MirrorGroup:
a.update(action)
action = a
self.increment_mirror(gr, action)
@ -1973,7 +1984,7 @@ index dad410b..7975f1b 100644
def increment_mirror(self, gr, action={}):
"""Tell the mirror object increment the mirror index
@@ -377,35 +404,50 @@ class MirrorGroup:
@@ -377,35 +405,50 @@ class MirrorGroup:
gr.url = url
gr.kw = dict(kw)
self._load_gr(gr)

Loading…
Cancel
Save