- Update to latest HEAD.

- fix some test cases that were failing. BZ 918658 - exit(1) or /bin/urlgrabber failures. BZ 918613 - clamp timestamps from the future. BZ 894630 - enable GSSNEGOTIATE if implemented correctly. - make error messages more verbose.
12 years ago · c508ad399b
parent cb2d39c735
commit c508ad399b
2 changed files with 125 additions and 77 deletions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 24%{?dist}
+Release: 25%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch

@ -44,6 +44,14 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down

 %changelog
+* Thu Mar  7 2013 Zdeněk Pavlas <zpavlas@redhat.com> - 3.9.1-25
+- Update to latest HEAD.
+- fix some test cases that were failing.  BZ 918658
+- exit(1) or /bin/urlgrabber failures.  BZ 918613
+- clamp timestamps from the future.  BZ 894630
+- enable GSSNEGOTIATE if implemented correctly.
+- make error messages more verbose.
+
 * Thu Feb 14 2013 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.9.1-24
 - Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild

--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -12,7 +12,7 @@ index 0000000..1ffe416
 +*.kateproject
 +ipython.log*
 diff --git a/scripts/urlgrabber b/scripts/urlgrabber
-index 518e512..09cd896 100644
+index 518e512..07881b3 100644
 --- a/scripts/urlgrabber
 +++ b/scripts/urlgrabber
@@ -115,6 +115,7 @@ options:
@ -71,6 +71,14 @@ index 518e512..09cd896 100644
 
     def help_doc(self):
         print __doc__
+@@ -294,6 +301,7 @@ class ugclient:
+                 if self.op.localfile: print f
+             except URLGrabError, e:
+                 print e
+                sys.exit(1)
+         
+     def set_debug_logger(self, dbspec):
+         try:
 diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
 new file mode 100755
 index 0000000..3dafb12
@ -181,6 +189,37 @@ index 50c6348..5fb43f9 100644
 base_ftp  = 'ftp://localhost/test/'
 
 # set to a proftp server only. we're working around a couple of
+diff --git a/test/test_mirror.py b/test/test_mirror.py
+index 70fe069..cb63a41 100644
+--- a/test/test_mirror.py
+++ b/test/test_mirror.py
+@@ -28,7 +28,7 @@ import os
+ import string, tempfile, random, cStringIO, os
+ 
+ import urlgrabber.grabber
+-from urlgrabber.grabber import URLGrabber, URLGrabError
+from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions
+ import urlgrabber.mirror
+ from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
+ 
+@@ -106,6 +106,9 @@ class CallbackTests(TestCase):
+         self.g  = URLGrabber()
+         fullmirrors = [base_mirror_url + m + '/' for m in \
+                        (bad_mirrors + good_mirrors)]
+        if hasattr(urlgrabber.grabber, '_TH'):
+            # test assumes mirrors are not re-ordered
+            urlgrabber.grabber._TH.hosts.clear()
+         self.mg = MirrorGroup(self.g, fullmirrors)
+     
+     def test_failure_callback(self):
+@@ -168,6 +171,7 @@ class FakeGrabber:
+         self.resultlist = resultlist or []
+         self.index = 0
+         self.calls = []
+        self.opts = URLGrabberOptions()
+         
+     def urlgrab(self, url, filename=None, **kwargs):
+         self.calls.append( (url, filename) )
 diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
 index 3e5f3b7..8eeaeda 100644
 --- a/urlgrabber/byterange.py
@ -236,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
     return (fb,lb)
 
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..6ce9861 100644
+index e090e90..1afb2c5 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -958,7 +997,7 @@ index e090e90..6ce9861 100644
             if opts.ssl_cert_type:                
                 self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
             if opts.ssl_key_pass:
-@@ -1187,28 +1414,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
             
@ -982,9 +1021,11 @@ index e090e90..6ce9861 100644
 +        # proxy
 +        if opts.proxy is not None:
 +            self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
-+            self.curl_obj.setopt(pycurl.PROXYAUTH,
-+                # All but Kerberos.  BZ 769254
-+                pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+            auth = pycurl.HTTPAUTH_ANY
+            if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
+                # BZ 769254: work around a bug in curl < 7.28.0
+                auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
+            self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
 +
 +        if opts.username and opts.password:
 +            if self.scheme in ('http', 'https'):
@ -1003,7 +1044,7 @@ index e090e90..6ce9861 100644
             
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1453,36 @@ class PyCurlFileObject():
+@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
             
             code = self.http_code
             errcode = e.args[0]
@ -1029,20 +1070,17 @@ index e090e90..6ce9861 100644
             elif errcode == 28:
 -                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
 -                err.url = self.url
-+                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
-+                err.url = errurl
-                 raise err
-             elif errcode == 35:
-                 msg = _("problem making ssl connection")
-                 err = URLGrabError(14, msg)
+-                raise err
+-            elif errcode == 35:
+-                msg = _("problem making ssl connection")
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
-+                err.url = errurl
-                 raise err
-             elif errcode == 37:
+-                raise err
+-            elif errcode == 37:
 -                msg = _("Could not open/read %s") % (self.url)
-+                msg = _("Could not open/read %s") % (errurl)
-                 err = URLGrabError(14, msg)
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
+                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
 +                err.url = errurl
                 raise err
                 
@ -1052,48 +1090,16 @@ index e090e90..6ce9861 100644
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
                 # which almost always means something aborted it from outside
-@@ -1272,33 +1494,94 @@ class PyCurlFileObject():
-             elif errcode == 58:
-                 msg = _("problem with the local client certificate")
-                 err = URLGrabError(14, msg)
-                err.url = self.url
-+                err.url = errurl
-                 raise err
- 
-             elif errcode == 60:
-                msg = _("client cert cannot be verified or client cert incorrect")
-+                msg = _("Peer cert cannot be verified or peer cert invalid")
-                 err = URLGrabError(14, msg)
-                err.url = self.url
-+                err.url = errurl
-                 raise err
-             
-             elif errcode == 63:
-                 if self._error[1]:
-                     msg = self._error[1]
-                 else:
-                    msg = _("Max download size exceeded on %s") % (self.url)
-+                    msg = _("Max download size exceeded on %s") % ()
-                 err = URLGrabError(14, msg)
+@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
+                 # figure out what aborted the pycurl process FIXME
+                 raise KeyboardInterrupt
+                 
+-            elif errcode == 58:
+-                msg = _("problem with the local client certificate")
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
-+                err.url = errurl
-                 raise err
-                     
-            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-+            elif str(e.args[1]) == '' and code and not 200 <= code <= 299:
-+                if self.scheme in ['http', 'https']:
-+                    if self.http_code in responses:
-+                        resp = responses[self.http_code]
-+                        msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
-+                    else:
-+                        msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
-+                elif self.scheme in ['ftp']:
-+                    msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
-+                else:
-+                    msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
-             else:
-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                raise err
+            else:
 +                pyerr2str = { 5 : _("Couldn't resolve proxy"),
 +                              6 : _("Couldn't resolve host"),
 +                              7 : _("Couldn't connect"),
@ -1137,25 +1143,57 @@ index e090e90..6ce9861 100644
 +                             70 : _("Out of disk space on server"),
 +                             73 : _("Remove file exists"),
 +                              }
-+                errstr = str(e.args[1])
-+                if not errstr:
-+                    errstr = pyerr2str.get(errcode, '<Unknown>')
-+                msg = 'curl#%s - "%s"' % (errcode, errstr)
-                 code = errcode
-             err = URLGrabError(14, msg)
-             err.code = code
-             err.exception = e
-             raise err
+                errstr = str(e.args[1]) or pyerr2str.get(errcode, '<Unknown>')
+                if code and not 200 <= code <= 299:
+                    msg = '%s Error %d - %s' % (self.scheme.upper(), code,
+                                                self.scheme in ('http', 'https')
+                                                and responses.get(code) or errstr)
+                else:
+                    msg = 'curl#%s - "%s"' % (errcode, errstr)
+                    code = errcode
+ 
+-            elif errcode == 60:
+-                msg = _("client cert cannot be verified or client cert incorrect")
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
+                err.url = errurl
+                err.code = code
+                 raise err
+-            
+-            elif errcode == 63:
+-                if self._error[1]:
+-                    msg = self._error[1]
+-                else:
+-                    msg = _("Max download size exceeded on %s") % (self.url)
+
 +        else:
 +            if self._error[1]:
 +                msg = self._error[1]
-+                err = URLGrabError(14, msg)
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
 +                err.url = urllib.unquote(self.url)
-+                raise err
+                 raise err
+-                    
+-            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
+-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+-            else:
+-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                code = errcode
+-            err = URLGrabError(14, msg)
+-            err.code = code
+-            err.exception = e
+-            raise err
 
     def _do_open(self):
         self.curl_obj = _curl_cache
-@@ -1333,7 +1616,11 @@ class PyCurlFileObject():
+-        self.curl_obj.reset() # reset all old settings away, just in case
+        # reset() clears PYCURL_ERRORBUFFER, and there's no way
+        # to reinitialize it, so better don't do that.  BZ 896025
+        #self.curl_obj.reset() # reset all old settings away, just in case
+         # setup any ranges
+         self._set_opts()
+         self._do_grab()
+@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
                 
         if self.opts.range:
             rt = self.opts.range
@ -1168,7 +1206,7 @@ index e090e90..6ce9861 100644
 
         if rt:
             header = range_tuple_to_header(rt)
-@@ -1434,21 +1721,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
             #fh, self._temp_name = mkstemp()
             #self.fo = open(self._temp_name, 'wb')
 
@ -1222,7 +1260,7 @@ index e090e90..6ce9861 100644
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
-@@ -1526,17 +1838,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
             if self._prog_running:
                 downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
@ -1248,7 +1286,7 @@ index e090e90..6ce9861 100644
 
             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
-@@ -1544,13 +1859,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
             return True
         return False
         
@ -1262,7 +1300,7 @@ index e090e90..6ce9861 100644
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
-@@ -1582,9 +1890,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()
         
@ -1285,7 +1323,7 @@ index e090e90..6ce9861 100644
 
 #####################################################################
 # DEPRECATED FUNCTIONS
-@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
 
         
 #####################################################################
@ -1727,6 +1765,8 @@ index e090e90..6ce9861 100644
 +        if ug_err is None:
 +            # defer first update if the file was small.  BZ 851178.
 +            if not ts and dl_size < 1e6: return
+            # clamp timestamps from the future.  BZ 894630.
+            if ts > now: ts = now
 +
 +            # k1: the older, the less useful
 +            # k2: <500ms readings are less reliable