From bc8c777de15f6cfcf4a9c60808fb64db794d3c67 Mon Sep 17 00:00:00 2001 From: Zdenek Pavlas Date: Fri, 17 May 2013 10:12:39 +0200 Subject: [PATCH] Update to latest HEAD. --- python-urlgrabber.spec | 7 +++- urlgrabber-HEAD.patch | 79 +++++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index fa7dc7d..0531d94 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 26%{?dist} +Release: 27%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Fri May 17 2013 Zdenek Pavlas - 3.9.1-27 +- Update to latest HEAD. +- add URLGrabError.code to the external downloader API +- Disable GSSNEGOTIATE to work around a curl bug. BZ 960163 + * Wed Mar 27 2013 Zdenek Pavlas - 3.9.1-26 - Update to latest HEAD. - Handle HTTP 200 response to range requests correctly. BZ 919076 diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index 8947982..57f41ff 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -81,7 +81,7 @@ index 518e512..07881b3 100644 try: diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down new file mode 100755 -index 0000000..3dafb12 +index 0000000..9ea0e70 --- /dev/null +++ b/scripts/urlgrabber-ext-down @@ -0,0 +1,75 @@ @@ -155,7 +155,7 @@ index 0000000..3dafb12 + ug_err = 'OK' + except URLGrabError, e: + size = 0 -+ ug_err = '%d %s' % e.args ++ ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror) + write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err) + +if __name__ == '__main__': @@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644 # set to a proftp server only. we're working around a couple of diff --git a/test/test_mirror.py b/test/test_mirror.py -index 70fe069..cb63a41 100644 +index 70fe069..6fdb668 100644 --- a/test/test_mirror.py +++ b/test/test_mirror.py @@ -28,7 +28,7 @@ import os @@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644 def urlgrab(self, url, filename=None, **kwargs): self.calls.append( (url, filename) ) +@@ -265,6 +269,38 @@ class ActionTests(TestCase): + self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) + + ++class HttpReplyCode(TestCase): ++ def setUp(self): ++ def server(): ++ import socket ++ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) ++ s.bind(('localhost', 2000)); s.listen(1) ++ while 1: ++ c, a = s.accept() ++ while not c.recv(4096).endswith('\r\n\r\n'): pass ++ c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) ++ c.close() ++ import thread ++ self.reply = 503, "Busy" ++ thread.start_new_thread(server, ()) ++ ++ def failure(obj): ++ self.code = getattr(obj.exception, 'code', None) ++ return {} ++ self.g = URLGrabber() ++ self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure) ++ ++ def test_grab(self): ++ self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') ++ self.assertEquals(self.code, 503); del self.code ++ ++ err = [] ++ self.mg.urlgrab('foo', async = True, failfunc = err.append) ++ urlgrabber.grabber.parallel_wait() ++ self.assertEquals([e.exception.errno for e in err], [256]) ++ self.assertEquals(self.code, 503); del self.code ++ + def suite(): + tl = TestLoader() + return tl.loadTestsFromModule(sys.modules[__name__]) diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py index 3e5f3b7..8eeaeda 100644 --- a/urlgrabber/byterange.py @@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..9526dc1 100644 +index e090e90..37d1297 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -1018,7 +1057,7 @@ index e090e90..9526dc1 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1423,28 @@ class PyCurlFileObject(): +@@ -1187,28 +1423,26 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -1042,11 +1081,9 @@ index e090e90..9526dc1 100644 + # proxy + if opts.proxy is not None: + self.curl_obj.setopt(pycurl.PROXY, opts.proxy) -+ auth = pycurl.HTTPAUTH_ANY -+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0): -+ # BZ 769254: work around a bug in curl < 7.28.0 -+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE -+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth) ++ self.curl_obj.setopt(pycurl.PROXYAUTH, ++ # All but Kerberos. BZ 769254 ++ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE) + + if opts.username and opts.password: + if self.scheme in ('http', 'https'): @@ -1065,7 +1102,7 @@ index e090e90..9526dc1 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1464,26 @@ class PyCurlFileObject(): +@@ -1228,39 +1462,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1111,7 +1148,7 @@ index e090e90..9526dc1 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1269,36 +1492,70 @@ class PyCurlFileObject(): +@@ -1269,36 +1490,70 @@ class PyCurlFileObject(): # figure out what aborted the pycurl process FIXME raise KeyboardInterrupt @@ -1207,7 +1244,7 @@ index e090e90..9526dc1 100644 def _do_open(self): self.curl_obj = _curl_cache -@@ -1333,7 +1590,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1588,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1220,7 +1257,7 @@ index e090e90..9526dc1 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1695,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1693,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1274,7 +1311,7 @@ index e090e90..9526dc1 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1812,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1810,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1300,7 +1337,7 @@ index e090e90..9526dc1 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1833,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1831,6 @@ class PyCurlFileObject(): return True return False @@ -1314,7 +1351,7 @@ index e090e90..9526dc1 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1864,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1862,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1337,7 +1374,7 @@ index e090e90..9526dc1 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, ##################################################################### @@ -1478,7 +1515,7 @@ index e090e90..9526dc1 100644 + raise KeyboardInterrupt + for line in lines: + # parse downloader output -+ line = line.split(' ', 5) ++ line = line.split(' ', 6) + _id, size = map(int, line[:2]) + if len(line) == 2: + self.running[_id]._progress.update(size) @@ -1489,7 +1526,9 @@ index e090e90..9526dc1 100644 + ug_err = None + if DEBUG: DEBUG.info('success') + else: -+ ug_err = URLGrabError(int(line[4]), line[5]) ++ ug_err = URLGrabError(int(line[4]), line[6]) ++ if line[5] != '0': ++ ug_err.code = int(line[5]) + if DEBUG: DEBUG.info('failure: %s', ug_err) + _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) + ret.append((opts, size, ug_err))