- Update to latest HEAD.

- fix some test cases that were failing.  BZ 918658
- exit(1) or /bin/urlgrabber failures.  BZ 918613
- clamp timestamps from the future.  BZ 894630
- enable GSSNEGOTIATE if implemented correctly.
- make error messages more verbose.
epel9
Zdenek Pavlas 12 years ago
parent cb2d39c735
commit c508ad399b

@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
Release: 24%{?dist}
Release: 25%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@ -44,6 +44,14 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
* Thu Mar 7 2013 Zdeněk Pavlas <zpavlas@redhat.com> - 3.9.1-25
- Update to latest HEAD.
- fix some test cases that were failing. BZ 918658
- exit(1) or /bin/urlgrabber failures. BZ 918613
- clamp timestamps from the future. BZ 894630
- enable GSSNEGOTIATE if implemented correctly.
- make error messages more verbose.
* Thu Feb 14 2013 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.9.1-24
- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild

@ -12,7 +12,7 @@ index 0000000..1ffe416
+*.kateproject
+ipython.log*
diff --git a/scripts/urlgrabber b/scripts/urlgrabber
index 518e512..09cd896 100644
index 518e512..07881b3 100644
--- a/scripts/urlgrabber
+++ b/scripts/urlgrabber
@@ -115,6 +115,7 @@ options:
@ -71,6 +71,14 @@ index 518e512..09cd896 100644
def help_doc(self):
print __doc__
@@ -294,6 +301,7 @@ class ugclient:
if self.op.localfile: print f
except URLGrabError, e:
print e
+ sys.exit(1)
def set_debug_logger(self, dbspec):
try:
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755
index 0000000..3dafb12
@ -181,6 +189,37 @@ index 50c6348..5fb43f9 100644
base_ftp = 'ftp://localhost/test/'
# set to a proftp server only. we're working around a couple of
diff --git a/test/test_mirror.py b/test/test_mirror.py
index 70fe069..cb63a41 100644
--- a/test/test_mirror.py
+++ b/test/test_mirror.py
@@ -28,7 +28,7 @@ import os
import string, tempfile, random, cStringIO, os
import urlgrabber.grabber
-from urlgrabber.grabber import URLGrabber, URLGrabError
+from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions
import urlgrabber.mirror
from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
@@ -106,6 +106,9 @@ class CallbackTests(TestCase):
self.g = URLGrabber()
fullmirrors = [base_mirror_url + m + '/' for m in \
(bad_mirrors + good_mirrors)]
+ if hasattr(urlgrabber.grabber, '_TH'):
+ # test assumes mirrors are not re-ordered
+ urlgrabber.grabber._TH.hosts.clear()
self.mg = MirrorGroup(self.g, fullmirrors)
def test_failure_callback(self):
@@ -168,6 +171,7 @@ class FakeGrabber:
self.resultlist = resultlist or []
self.index = 0
self.calls = []
+ self.opts = URLGrabberOptions()
def urlgrab(self, url, filename=None, **kwargs):
self.calls.append( (url, filename) )
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 3e5f3b7..8eeaeda 100644
--- a/urlgrabber/byterange.py
@ -236,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index e090e90..6ce9861 100644
index e090e90..1afb2c5 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -958,7 +997,7 @@ index e090e90..6ce9861 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
@@ -1187,28 +1414,26 @@ class PyCurlFileObject():
@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@ -982,9 +1021,11 @@ index e090e90..6ce9861 100644
+ # proxy
+ if opts.proxy is not None:
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
+ self.curl_obj.setopt(pycurl.PROXYAUTH,
+ # All but Kerberos. BZ 769254
+ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+ auth = pycurl.HTTPAUTH_ANY
+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
+ # BZ 769254: work around a bug in curl < 7.28.0
+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
+
+ if opts.username and opts.password:
+ if self.scheme in ('http', 'https'):
@ -1003,7 +1044,7 @@ index e090e90..6ce9861 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1228,39 +1453,36 @@ class PyCurlFileObject():
@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@ -1029,20 +1070,17 @@ index e090e90..6ce9861 100644
elif errcode == 28:
- err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
- err.url = self.url
+ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
+ err.url = errurl
raise err
elif errcode == 35:
msg = _("problem making ssl connection")
err = URLGrabError(14, msg)
- raise err
- elif errcode == 35:
- msg = _("problem making ssl connection")
- err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = errurl
raise err
elif errcode == 37:
- raise err
- elif errcode == 37:
- msg = _("Could not open/read %s") % (self.url)
+ msg = _("Could not open/read %s") % (errurl)
err = URLGrabError(14, msg)
- err = URLGrabError(14, msg)
- err.url = self.url
+ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
+ err.url = errurl
raise err
@ -1052,48 +1090,16 @@ index e090e90..6ce9861 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
@@ -1272,33 +1494,94 @@ class PyCurlFileObject():
elif errcode == 58:
msg = _("problem with the local client certificate")
err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = errurl
raise err
elif errcode == 60:
- msg = _("client cert cannot be verified or client cert incorrect")
+ msg = _("Peer cert cannot be verified or peer cert invalid")
err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = errurl
raise err
elif errcode == 63:
if self._error[1]:
msg = self._error[1]
else:
- msg = _("Max download size exceeded on %s") % (self.url)
+ msg = _("Max download size exceeded on %s") % ()
err = URLGrabError(14, msg)
@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
- elif errcode == 58:
- msg = _("problem with the local client certificate")
- err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = errurl
raise err
- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+ elif str(e.args[1]) == '' and code and not 200 <= code <= 299:
+ if self.scheme in ['http', 'https']:
+ if self.http_code in responses:
+ resp = responses[self.http_code]
+ msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
+ else:
+ msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
+ elif self.scheme in ['ftp']:
+ msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
+ else:
+ msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
else:
- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
- raise err
+ else:
+ pyerr2str = { 5 : _("Couldn't resolve proxy"),
+ 6 : _("Couldn't resolve host"),
+ 7 : _("Couldn't connect"),
@ -1137,25 +1143,57 @@ index e090e90..6ce9861 100644
+ 70 : _("Out of disk space on server"),
+ 73 : _("Remove file exists"),
+ }
+ errstr = str(e.args[1])
+ if not errstr:
+ errstr = pyerr2str.get(errcode, '<Unknown>')
+ msg = 'curl#%s - "%s"' % (errcode, errstr)
code = errcode
err = URLGrabError(14, msg)
err.code = code
err.exception = e
raise err
+ errstr = str(e.args[1]) or pyerr2str.get(errcode, '<Unknown>')
+ if code and not 200 <= code <= 299:
+ msg = '%s Error %d - %s' % (self.scheme.upper(), code,
+ self.scheme in ('http', 'https')
+ and responses.get(code) or errstr)
+ else:
+ msg = 'curl#%s - "%s"' % (errcode, errstr)
+ code = errcode
- elif errcode == 60:
- msg = _("client cert cannot be verified or client cert incorrect")
err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = errurl
+ err.code = code
raise err
-
- elif errcode == 63:
- if self._error[1]:
- msg = self._error[1]
- else:
- msg = _("Max download size exceeded on %s") % (self.url)
+
+ else:
+ if self._error[1]:
+ msg = self._error[1]
+ err = URLGrabError(14, msg)
err = URLGrabError(14, msg)
- err.url = self.url
+ err.url = urllib.unquote(self.url)
+ raise err
raise err
-
- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
- else:
- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
- code = errcode
- err = URLGrabError(14, msg)
- err.code = code
- err.exception = e
- raise err
def _do_open(self):
self.curl_obj = _curl_cache
@@ -1333,7 +1616,11 @@ class PyCurlFileObject():
- self.curl_obj.reset() # reset all old settings away, just in case
+ # reset() clears PYCURL_ERRORBUFFER, and there's no way
+ # to reinitialize it, so better don't do that. BZ 896025
+ #self.curl_obj.reset() # reset all old settings away, just in case
# setup any ranges
self._set_opts()
self._do_grab()
@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@ -1168,7 +1206,7 @@ index e090e90..6ce9861 100644
if rt:
header = range_tuple_to_header(rt)
@@ -1434,21 +1721,46 @@ class PyCurlFileObject():
@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@ -1222,7 +1260,7 @@ index e090e90..6ce9861 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
@@ -1526,17 +1838,20 @@ class PyCurlFileObject():
@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@ -1248,7 +1286,7 @@ index e090e90..6ce9861 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
@@ -1544,13 +1859,6 @@ class PyCurlFileObject():
@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
return True
return False
@ -1262,7 +1300,7 @@ index e090e90..6ce9861 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
@@ -1582,9 +1890,21 @@ class PyCurlFileObject():
@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@ -1285,7 +1323,7 @@ index e090e90..6ce9861 100644
#####################################################################
# DEPRECATED FUNCTIONS
@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@ -1727,6 +1765,8 @@ index e090e90..6ce9861 100644
+ if ug_err is None:
+ # defer first update if the file was small. BZ 851178.
+ if not ts and dl_size < 1e6: return
+ # clamp timestamps from the future. BZ 894630.
+ if ts > now: ts = now
+
+ # k1: the older, the less useful
+ # k2: <500ms readings are less reliable

Loading…
Cancel
Save