From de85533e85cd402e6e133bb87837b49d62a87773 Mon Sep 17 00:00:00 2001 From: Zdenek Pavlas Date: Wed, 27 Mar 2013 10:55:23 +0100 Subject: [PATCH 1/3] Update to latest HEAD --- python-urlgrabber.spec | 7 ++++- urlgrabber-HEAD.patch | 64 +++++++++++++++++++++++++----------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 43666d9..fa7dc7d 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 25%{?dist} +Release: 26%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Wed Mar 27 2013 Zdenek Pavlas - 3.9.1-26 +- Update to latest HEAD. +- Handle HTTP 200 response to range requests correctly. BZ 919076 +- Reset curl_obj to clear CURLOPT_RANGE from previous requests. BZ 923951 + * Thu Mar 7 2013 Zdeněk Pavlas - 3.9.1-25 - Update to latest HEAD. - fix some test cases that were failing. BZ 918658 diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index 4633455..8947982 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -275,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..1afb2c5 100644 +index e090e90..9526dc1 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -874,7 +874,7 @@ index e090e90..1afb2c5 100644 if not self._prog_running: if self.opts.progress_obj: size = self.size + self._reget_length -@@ -1079,15 +1267,24 @@ class PyCurlFileObject(): +@@ -1079,23 +1267,40 @@ class PyCurlFileObject(): self.opts.progress_obj.update(self._amount_read) self._amount_read += len(buf) @@ -899,9 +899,29 @@ index e090e90..1afb2c5 100644 - return -1 + return -1 try: - self._hdr_dump += buf +- self._hdr_dump += buf # we have to get the size before we do the progress obj start -@@ -1104,7 +1301,17 @@ class PyCurlFileObject(): + # but we can't do that w/o making it do 2 connects, which sucks + # so we cheat and stuff it in here in the hdr_retrieve +- if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1: +- length = buf.split(':')[1] +- self.size = int(length) ++ if self.scheme in ['http','https']: ++ if buf.lower().find('content-length') != -1: ++ length = buf.split(':')[1] ++ self.size = int(length) ++ elif self.append and self._hdr_dump == '' and ' 200 ' in buf: ++ # reget was attempted but server sends it all ++ # undo what we did in _build_range() ++ self.append = False ++ self.reget_time = None ++ self._amount_read = 0 ++ self._reget_length = 0 ++ self.fo.truncate(0) + elif self.scheme in ['ftp']: + s = None + if buf.startswith('213 '): +@@ -1104,7 +1309,18 @@ class PyCurlFileObject(): s = parse150(buf) if s: self.size = int(s) @@ -913,6 +933,7 @@ index e090e90..1afb2c5 100644 + self.scheme = urlparse.urlsplit(location)[0] + self.url = location + ++ self._hdr_dump += buf + if len(self._hdr_dump) != 0 and buf == '\r\n': + self._hdr_ended = True + if DEBUG: DEBUG.debug('header ended:') @@ -920,7 +941,7 @@ index e090e90..1afb2c5 100644 return len(buf) except KeyboardInterrupt: return pycurl.READFUNC_ABORT -@@ -1113,8 +1320,10 @@ class PyCurlFileObject(): +@@ -1113,8 +1329,10 @@ class PyCurlFileObject(): if self._parsed_hdr: return self._parsed_hdr statusend = self._hdr_dump.find('\n') @@ -931,7 +952,7 @@ index e090e90..1afb2c5 100644 self._parsed_hdr = mimetools.Message(hdrfp) return self._parsed_hdr -@@ -1127,6 +1336,9 @@ class PyCurlFileObject(): +@@ -1127,6 +1345,9 @@ class PyCurlFileObject(): if not opts: opts = self.opts @@ -941,7 +962,7 @@ index e090e90..1afb2c5 100644 # defaults we're always going to set self.curl_obj.setopt(pycurl.NOPROGRESS, False) -@@ -1136,11 +1348,21 @@ class PyCurlFileObject(): +@@ -1136,11 +1357,21 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) self.curl_obj.setopt(pycurl.FAILONERROR, True) self.curl_obj.setopt(pycurl.OPT_FILETIME, True) @@ -964,7 +985,7 @@ index e090e90..1afb2c5 100644 # maybe to be options later self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) -@@ -1148,9 +1370,11 @@ class PyCurlFileObject(): +@@ -1148,9 +1379,11 @@ class PyCurlFileObject(): # timeouts timeout = 300 @@ -979,7 +1000,7 @@ index e090e90..1afb2c5 100644 # ssl options if self.scheme == 'https': -@@ -1158,13 +1382,16 @@ class PyCurlFileObject(): +@@ -1158,13 +1391,16 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) @@ -997,7 +1018,7 @@ index e090e90..1afb2c5 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1414,28 @@ class PyCurlFileObject(): +@@ -1187,28 +1423,28 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -1044,7 +1065,7 @@ index e090e90..1afb2c5 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1455,26 @@ class PyCurlFileObject(): +@@ -1228,39 +1464,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1090,7 +1111,7 @@ index e090e90..1afb2c5 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1269,40 +1483,76 @@ class PyCurlFileObject(): +@@ -1269,36 +1492,70 @@ class PyCurlFileObject(): # figure out what aborted the pycurl process FIXME raise KeyboardInterrupt @@ -1186,14 +1207,7 @@ index e090e90..1afb2c5 100644 def _do_open(self): self.curl_obj = _curl_cache -- self.curl_obj.reset() # reset all old settings away, just in case -+ # reset() clears PYCURL_ERRORBUFFER, and there's no way -+ # to reinitialize it, so better don't do that. BZ 896025 -+ #self.curl_obj.reset() # reset all old settings away, just in case - # setup any ranges - self._set_opts() - self._do_grab() -@@ -1333,7 +1583,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1590,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1206,7 +1220,7 @@ index e090e90..1afb2c5 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1688,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1695,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1260,7 +1274,7 @@ index e090e90..1afb2c5 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1805,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1812,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1286,7 +1300,7 @@ index e090e90..1afb2c5 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1826,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1833,6 @@ class PyCurlFileObject(): return True return False @@ -1300,7 +1314,7 @@ index e090e90..1afb2c5 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1857,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1864,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1323,7 +1337,7 @@ index e090e90..1afb2c5 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, ##################################################################### From bc8c777de15f6cfcf4a9c60808fb64db794d3c67 Mon Sep 17 00:00:00 2001 From: Zdenek Pavlas Date: Fri, 17 May 2013 10:12:39 +0200 Subject: [PATCH 2/3] Update to latest HEAD. --- python-urlgrabber.spec | 7 +++- urlgrabber-HEAD.patch | 79 +++++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index fa7dc7d..0531d94 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 26%{?dist} +Release: 27%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Fri May 17 2013 Zdenek Pavlas - 3.9.1-27 +- Update to latest HEAD. +- add URLGrabError.code to the external downloader API +- Disable GSSNEGOTIATE to work around a curl bug. BZ 960163 + * Wed Mar 27 2013 Zdenek Pavlas - 3.9.1-26 - Update to latest HEAD. - Handle HTTP 200 response to range requests correctly. BZ 919076 diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index 8947982..57f41ff 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -81,7 +81,7 @@ index 518e512..07881b3 100644 try: diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down new file mode 100755 -index 0000000..3dafb12 +index 0000000..9ea0e70 --- /dev/null +++ b/scripts/urlgrabber-ext-down @@ -0,0 +1,75 @@ @@ -155,7 +155,7 @@ index 0000000..3dafb12 + ug_err = 'OK' + except URLGrabError, e: + size = 0 -+ ug_err = '%d %s' % e.args ++ ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror) + write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err) + +if __name__ == '__main__': @@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644 # set to a proftp server only. we're working around a couple of diff --git a/test/test_mirror.py b/test/test_mirror.py -index 70fe069..cb63a41 100644 +index 70fe069..6fdb668 100644 --- a/test/test_mirror.py +++ b/test/test_mirror.py @@ -28,7 +28,7 @@ import os @@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644 def urlgrab(self, url, filename=None, **kwargs): self.calls.append( (url, filename) ) +@@ -265,6 +269,38 @@ class ActionTests(TestCase): + self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) + + ++class HttpReplyCode(TestCase): ++ def setUp(self): ++ def server(): ++ import socket ++ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) ++ s.bind(('localhost', 2000)); s.listen(1) ++ while 1: ++ c, a = s.accept() ++ while not c.recv(4096).endswith('\r\n\r\n'): pass ++ c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) ++ c.close() ++ import thread ++ self.reply = 503, "Busy" ++ thread.start_new_thread(server, ()) ++ ++ def failure(obj): ++ self.code = getattr(obj.exception, 'code', None) ++ return {} ++ self.g = URLGrabber() ++ self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure) ++ ++ def test_grab(self): ++ self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') ++ self.assertEquals(self.code, 503); del self.code ++ ++ err = [] ++ self.mg.urlgrab('foo', async = True, failfunc = err.append) ++ urlgrabber.grabber.parallel_wait() ++ self.assertEquals([e.exception.errno for e in err], [256]) ++ self.assertEquals(self.code, 503); del self.code ++ + def suite(): + tl = TestLoader() + return tl.loadTestsFromModule(sys.modules[__name__]) diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py index 3e5f3b7..8eeaeda 100644 --- a/urlgrabber/byterange.py @@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..9526dc1 100644 +index e090e90..37d1297 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -1018,7 +1057,7 @@ index e090e90..9526dc1 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1423,28 @@ class PyCurlFileObject(): +@@ -1187,28 +1423,26 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -1042,11 +1081,9 @@ index e090e90..9526dc1 100644 + # proxy + if opts.proxy is not None: + self.curl_obj.setopt(pycurl.PROXY, opts.proxy) -+ auth = pycurl.HTTPAUTH_ANY -+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0): -+ # BZ 769254: work around a bug in curl < 7.28.0 -+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE -+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth) ++ self.curl_obj.setopt(pycurl.PROXYAUTH, ++ # All but Kerberos. BZ 769254 ++ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE) + + if opts.username and opts.password: + if self.scheme in ('http', 'https'): @@ -1065,7 +1102,7 @@ index e090e90..9526dc1 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1464,26 @@ class PyCurlFileObject(): +@@ -1228,39 +1462,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1111,7 +1148,7 @@ index e090e90..9526dc1 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1269,36 +1492,70 @@ class PyCurlFileObject(): +@@ -1269,36 +1490,70 @@ class PyCurlFileObject(): # figure out what aborted the pycurl process FIXME raise KeyboardInterrupt @@ -1207,7 +1244,7 @@ index e090e90..9526dc1 100644 def _do_open(self): self.curl_obj = _curl_cache -@@ -1333,7 +1590,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1588,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1220,7 +1257,7 @@ index e090e90..9526dc1 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1695,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1693,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1274,7 +1311,7 @@ index e090e90..9526dc1 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1812,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1810,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1300,7 +1337,7 @@ index e090e90..9526dc1 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1833,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1831,6 @@ class PyCurlFileObject(): return True return False @@ -1314,7 +1351,7 @@ index e090e90..9526dc1 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1864,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1862,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1337,7 +1374,7 @@ index e090e90..9526dc1 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, ##################################################################### @@ -1478,7 +1515,7 @@ index e090e90..9526dc1 100644 + raise KeyboardInterrupt + for line in lines: + # parse downloader output -+ line = line.split(' ', 5) ++ line = line.split(' ', 6) + _id, size = map(int, line[:2]) + if len(line) == 2: + self.running[_id]._progress.update(size) @@ -1489,7 +1526,9 @@ index e090e90..9526dc1 100644 + ug_err = None + if DEBUG: DEBUG.info('success') + else: -+ ug_err = URLGrabError(int(line[4]), line[5]) ++ ug_err = URLGrabError(int(line[4]), line[6]) ++ if line[5] != '0': ++ ug_err.code = int(line[5]) + if DEBUG: DEBUG.info('failure: %s', ug_err) + _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) + ret.append((opts, size, ug_err)) From b619025a751cf502f8dafb0fd03e63ec305039c6 Mon Sep 17 00:00:00 2001 From: Zdenek Pavlas Date: Mon, 20 May 2013 16:37:23 +0200 Subject: [PATCH 3/3] Update to latest HEAD --- python-urlgrabber.spec | 7 ++- urlgrabber-HEAD.patch | 109 ++++++++++++++++++++++------------------- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 0531d94..703b0d0 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 27%{?dist} +Release: 28%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch @@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Fri May 17 2013 Zdenek Pavlas - 3.9.1-28 +- Update to latest HEAD. +- Add the "minrate" option. BZ 964298 +- Workaround progress "!!!" end for file:// repos. + * Fri May 17 2013 Zdenek Pavlas - 3.9.1-27 - Update to latest HEAD. - add URLGrabError.code to the external downloader API diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index 57f41ff..d53e4c3 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -314,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..37d1297 100644 +index e090e90..05ea9c3 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -345,12 +345,19 @@ index e090e90..37d1297 100644 text = None specifies alternative text to be passed to the progress meter -@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs) +@@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs) (which can be set on default_grabber.throttle) is used. See BANDWIDTH THROTTLING for more information. - timeout = None + timeout = 300 ++ ++ a positive integer expressing the number of seconds to wait before ++ timing out attempts to connect to a server. If the value is None ++ or 0, connection attempts will not time out. The timeout is passed ++ to the underlying pycurl object as its CONNECTTIMEOUT option, see ++ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information. ++ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT - a positive float expressing the number of seconds to wait for socket - operations. If the value is None or 0.0, socket operations will block @@ -358,16 +365,15 @@ index e090e90..37d1297 100644 - method on the Socket object used for the request. See the Python - documentation on settimeout for more information. - http://www.python.org/doc/current/lib/socket-objects.html -+ a positive integer expressing the number of seconds to wait before -+ timing out attempts to connect to a server. If the value is None -+ or 0, connection attempts will not time out. The timeout is passed -+ to the underlying pycurl object as its CONNECTTIMEOUT option, see -+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information. -+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT ++ minrate = 1000 ++ ++ This sets the low speed threshold in bytes per second. If the server ++ is sending data slower than this for at least `timeout' seconds, the ++ library aborts the connection. bandwidth = 0 -@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs) +@@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs) note that proxy authentication information may be provided using normal URL constructs: proxies={ 'http' : 'http://user:host@foo:3128' } @@ -382,7 +388,7 @@ index e090e90..37d1297 100644 prefix = None -@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs) +@@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs) control, you should probably subclass URLParser and pass it in via the 'urlparser' option. @@ -395,7 +401,7 @@ index e090e90..37d1297 100644 ssl_ca_cert = None this option can be used if M2Crypto is available and will be -@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs) +@@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs) No-op when using the curl backend (default) @@ -480,7 +486,7 @@ index e090e90..37d1297 100644 RETRY RELATED ARGUMENTS -@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS +@@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS but it cannot (without severe trickiness) prevent the exception from being raised. @@ -496,7 +502,7 @@ index e090e90..37d1297 100644 interrupt_callback = None This callback is called if KeyboardInterrupt is received at any -@@ -420,6 +486,7 @@ import time +@@ -420,6 +492,7 @@ import time import string import urllib import urllib2 @@ -504,7 +510,7 @@ index e090e90..37d1297 100644 import mimetools import thread import types -@@ -428,9 +495,17 @@ import pycurl +@@ -428,9 +501,17 @@ import pycurl from ftplib import parse150 from StringIO import StringIO from httplib import HTTPException @@ -523,7 +529,7 @@ index e090e90..37d1297 100644 ######################################################################## # MODULE INITIALIZATION ######################################################################## -@@ -439,6 +514,12 @@ try: +@@ -439,6 +520,12 @@ try: except: __version__ = '???' @@ -536,7 +542,7 @@ index e090e90..37d1297 100644 ######################################################################## # functions for debugging output. These functions are here because they # are also part of the module initialization. -@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None): +@@ -504,6 +591,7 @@ def _init_default_logger(logspec=None): else: handler = logging.FileHandler(filename) handler.setFormatter(formatter) DBOBJ = logging.getLogger('urlgrabber') @@ -544,7 +550,7 @@ index e090e90..37d1297 100644 DBOBJ.addHandler(handler) DBOBJ.setLevel(level) except (KeyError, ImportError, ValueError): -@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None): +@@ -512,8 +600,8 @@ def _init_default_logger(logspec=None): def _log_package_state(): if not DEBUG: return @@ -555,7 +561,7 @@ index e090e90..37d1297 100644 _init_default_logger() _log_package_state() -@@ -527,6 +609,29 @@ def _(st): +@@ -527,6 +615,29 @@ def _(st): # END MODULE INITIALIZATION ######################################################################## @@ -585,7 +591,7 @@ index e090e90..37d1297 100644 class URLGrabError(IOError): -@@ -662,6 +767,7 @@ class URLParser: +@@ -662,6 +773,7 @@ class URLParser: opts.quote = 0 --> do not quote it opts.quote = None --> guess """ @@ -593,7 +599,7 @@ index e090e90..37d1297 100644 quote = opts.quote if opts.prefix: -@@ -768,6 +874,41 @@ class URLGrabberOptions: +@@ -768,6 +880,41 @@ class URLGrabberOptions: else: # throttle is a float return self.bandwidth * self.throttle @@ -635,7 +641,7 @@ index e090e90..37d1297 100644 def derive(self, **kwargs): """Create a derived URLGrabberOptions instance. This method creates a new instance and overrides the -@@ -791,30 +932,37 @@ class URLGrabberOptions: +@@ -791,30 +938,38 @@ class URLGrabberOptions: provided here. """ self.progress_obj = None @@ -663,6 +669,7 @@ index e090e90..37d1297 100644 self.cache_openers = True - self.timeout = None + self.timeout = 300 ++ self.minrate = None self.text = None self.http_headers = None self.ftp_headers = None @@ -674,7 +681,7 @@ index e090e90..37d1297 100644 self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb self.ssl_context = None # no-op in pycurl self.ssl_verify_peer = True # check peer's cert for authenticityb -@@ -827,6 +975,12 @@ class URLGrabberOptions: +@@ -827,6 +982,12 @@ class URLGrabberOptions: self.size = None # if we know how big the thing we're getting is going # to be. this is ultimately a MAXIMUM size for the file self.max_header_size = 2097152 #2mb seems reasonable for maximum header size @@ -687,7 +694,7 @@ index e090e90..37d1297 100644 def __repr__(self): return self.format() -@@ -846,7 +1000,18 @@ class URLGrabberOptions: +@@ -846,7 +1007,18 @@ class URLGrabberOptions: s = s + indent + '}' return s @@ -707,7 +714,7 @@ index e090e90..37d1297 100644 """Provides easy opening of URLs with a variety of options. All options are specified as kwargs. Options may be specified when -@@ -872,7 +1037,6 @@ class URLGrabber: +@@ -872,7 +1044,6 @@ class URLGrabber: # beware of infinite loops :) tries = tries + 1 exception = None @@ -715,7 +722,7 @@ index e090e90..37d1297 100644 callback = None if DEBUG: DEBUG.info('attempt %i/%s: %s', tries, opts.retry, args[0]) -@@ -883,54 +1047,62 @@ class URLGrabber: +@@ -883,54 +1054,62 @@ class URLGrabber: except URLGrabError, e: exception = e callback = opts.failure_callback @@ -785,7 +792,7 @@ index e090e90..37d1297 100644 if scheme == 'file' and not opts.copy_local: # just return the name of the local file - don't make a # copy currently -@@ -950,41 +1122,51 @@ class URLGrabber: +@@ -950,41 +1129,51 @@ class URLGrabber: elif not opts.range: if not opts.checkfunc is None: @@ -852,7 +859,7 @@ index e090e90..37d1297 100644 if limit is not None: limit = limit + 1 -@@ -1000,12 +1182,8 @@ class URLGrabber: +@@ -1000,12 +1189,8 @@ class URLGrabber: else: s = fo.read(limit) if not opts.checkfunc is None: @@ -867,7 +874,7 @@ index e090e90..37d1297 100644 finally: fo.close() return s -@@ -1020,6 +1198,7 @@ class URLGrabber: +@@ -1020,6 +1205,7 @@ class URLGrabber: return s def _make_callback(self, callback_obj): @@ -875,7 +882,7 @@ index e090e90..37d1297 100644 if callable(callback_obj): return callback_obj, (), {} else: -@@ -1030,7 +1209,7 @@ class URLGrabber: +@@ -1030,7 +1216,7 @@ class URLGrabber: default_grabber = URLGrabber() @@ -884,7 +891,7 @@ index e090e90..37d1297 100644 def __init__(self, url, filename, opts): self.fo = None self._hdr_dump = '' -@@ -1052,10 +1231,13 @@ class PyCurlFileObject(): +@@ -1052,10 +1238,13 @@ class PyCurlFileObject(): self._reget_length = 0 self._prog_running = False self._error = (None, None) @@ -900,7 +907,7 @@ index e090e90..37d1297 100644 def __getattr__(self, name): """This effectively allows us to wrap at the instance level. Any attribute not found in _this_ object will be searched for -@@ -1067,6 +1249,12 @@ class PyCurlFileObject(): +@@ -1067,6 +1256,12 @@ class PyCurlFileObject(): def _retrieve(self, buf): try: @@ -913,7 +920,7 @@ index e090e90..37d1297 100644 if not self._prog_running: if self.opts.progress_obj: size = self.size + self._reget_length -@@ -1079,23 +1267,40 @@ class PyCurlFileObject(): +@@ -1079,23 +1274,40 @@ class PyCurlFileObject(): self.opts.progress_obj.update(self._amount_read) self._amount_read += len(buf) @@ -960,7 +967,7 @@ index e090e90..37d1297 100644 elif self.scheme in ['ftp']: s = None if buf.startswith('213 '): -@@ -1104,7 +1309,18 @@ class PyCurlFileObject(): +@@ -1104,7 +1316,18 @@ class PyCurlFileObject(): s = parse150(buf) if s: self.size = int(s) @@ -980,7 +987,7 @@ index e090e90..37d1297 100644 return len(buf) except KeyboardInterrupt: return pycurl.READFUNC_ABORT -@@ -1113,8 +1329,10 @@ class PyCurlFileObject(): +@@ -1113,8 +1336,10 @@ class PyCurlFileObject(): if self._parsed_hdr: return self._parsed_hdr statusend = self._hdr_dump.find('\n') @@ -991,7 +998,7 @@ index e090e90..37d1297 100644 self._parsed_hdr = mimetools.Message(hdrfp) return self._parsed_hdr -@@ -1127,6 +1345,9 @@ class PyCurlFileObject(): +@@ -1127,6 +1352,9 @@ class PyCurlFileObject(): if not opts: opts = self.opts @@ -1001,7 +1008,7 @@ index e090e90..37d1297 100644 # defaults we're always going to set self.curl_obj.setopt(pycurl.NOPROGRESS, False) -@@ -1136,11 +1357,21 @@ class PyCurlFileObject(): +@@ -1136,11 +1364,21 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) self.curl_obj.setopt(pycurl.FAILONERROR, True) self.curl_obj.setopt(pycurl.OPT_FILETIME, True) @@ -1024,7 +1031,7 @@ index e090e90..37d1297 100644 # maybe to be options later self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) -@@ -1148,9 +1379,11 @@ class PyCurlFileObject(): +@@ -1148,9 +1386,11 @@ class PyCurlFileObject(): # timeouts timeout = 300 @@ -1034,12 +1041,12 @@ index e090e90..37d1297 100644 + if hasattr(opts, 'timeout'): + timeout = int(opts.timeout or 0) + self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) -+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000) + self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout) # ssl options if self.scheme == 'https': -@@ -1158,13 +1391,16 @@ class PyCurlFileObject(): +@@ -1158,13 +1398,16 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) @@ -1057,7 +1064,7 @@ index e090e90..37d1297 100644 if opts.ssl_cert_type: self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) if opts.ssl_key_pass: -@@ -1187,28 +1423,26 @@ class PyCurlFileObject(): +@@ -1187,28 +1430,26 @@ class PyCurlFileObject(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) @@ -1102,7 +1109,7 @@ index e090e90..37d1297 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,39 +1462,26 @@ class PyCurlFileObject(): +@@ -1228,39 +1469,26 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -1148,7 +1155,7 @@ index e090e90..37d1297 100644 # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1269,36 +1490,70 @@ class PyCurlFileObject(): +@@ -1269,36 +1497,70 @@ class PyCurlFileObject(): # figure out what aborted the pycurl process FIXME raise KeyboardInterrupt @@ -1244,7 +1251,7 @@ index e090e90..37d1297 100644 def _do_open(self): self.curl_obj = _curl_cache -@@ -1333,7 +1588,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1595,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -1257,7 +1264,7 @@ index e090e90..37d1297 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,21 +1693,46 @@ class PyCurlFileObject(): +@@ -1434,21 +1700,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') @@ -1311,7 +1318,7 @@ index e090e90..37d1297 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1526,17 +1810,20 @@ class PyCurlFileObject(): +@@ -1526,17 +1817,20 @@ class PyCurlFileObject(): if self._prog_running: downloaded += self._reget_length self.opts.progress_obj.update(downloaded) @@ -1337,7 +1344,7 @@ index e090e90..37d1297 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1831,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1838,6 @@ class PyCurlFileObject(): return True return False @@ -1351,7 +1358,7 @@ index e090e90..37d1297 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1862,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1869,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -1374,7 +1381,7 @@ index e090e90..37d1297 100644 ##################################################################### # DEPRECATED FUNCTIONS -@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, +@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, ##################################################################### @@ -1480,7 +1487,7 @@ index e090e90..37d1297 100644 + # list of options we pass to downloader + _options = ( + 'url', 'filename', -+ 'timeout', 'close_connection', 'keepalive', ++ 'timeout', 'minrate', 'close_connection', 'keepalive', + 'throttle', 'bandwidth', 'range', 'reget', + 'user_agent', 'http_headers', 'ftp_headers', + 'proxy', 'prefix', 'username', 'password', @@ -2022,7 +2029,7 @@ index dad410b..7975f1b 100644 def urlopen(self, url, **kwargs): kw = dict(kwargs) diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py -index dd07c6a..077fd99 100644 +index dd07c6a..b456a0c 100644 --- a/urlgrabber/progress.py +++ b/urlgrabber/progress.py @@ -133,8 +133,8 @@ class BaseMeter: @@ -2048,7 +2055,7 @@ index dd07c6a..077fd99 100644 + return tl.add(' [%-*.*s]' % (blen, blen, bar)) + +def _term_add_end(tl, osize, size): -+ if osize is not None: ++ if osize: # osize should be None or >0, but that's been broken. + if size > osize: # Is ??? better? Really need something to say < vs >. + return tl.add(' !!! '), True + elif size != osize: