From de85533e85cd402e6e133bb87837b49d62a87773 Mon Sep 17 00:00:00 2001
From: Zdenek Pavlas <zpavlas@redhat.com>
Date: Wed, 27 Mar 2013 10:55:23 +0100
Subject: [PATCH 1/3] Update to latest HEAD

---
 python-urlgrabber.spec |  7 ++++-
 urlgrabber-HEAD.patch  | 64 +++++++++++++++++++++++++-----------------
 2 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 43666d9..fa7dc7d 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 25%{?dist}
+Release: 26%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26
+- Update to latest HEAD.
+- Handle HTTP 200 response to range requests correctly.  BZ 919076
+- Reset curl_obj to clear CURLOPT_RANGE from previous requests.  BZ 923951
+
 * Thu Mar  7 2013 Zdeněk Pavlas <zpavlas@redhat.com> - 3.9.1-25
 - Update to latest HEAD.
 - fix some test cases that were failing.  BZ 918658
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 4633455..8947982 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -275,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
      return (fb,lb)
  
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..1afb2c5 100644
+index e090e90..9526dc1 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
 @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -874,7 +874,7 @@ index e090e90..1afb2c5 100644
              if not self._prog_running:
                  if self.opts.progress_obj:
                      size  = self.size + self._reget_length
-@@ -1079,15 +1267,24 @@ class PyCurlFileObject():
+@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
                      self.opts.progress_obj.update(self._amount_read)
  
              self._amount_read += len(buf)
@@ -899,9 +899,29 @@ index e090e90..1afb2c5 100644
 -            return -1            
 +            return -1
          try:
-             self._hdr_dump += buf
+-            self._hdr_dump += buf
              # we have to get the size before we do the progress obj start
-@@ -1104,7 +1301,17 @@ class PyCurlFileObject():
+             # but we can't do that w/o making it do 2 connects, which sucks
+             # so we cheat and stuff it in here in the hdr_retrieve
+-            if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
+-                length = buf.split(':')[1]
+-                self.size = int(length)
++            if self.scheme in ['http','https']:
++                if buf.lower().find('content-length') != -1:
++                    length = buf.split(':')[1]
++                    self.size = int(length)
++                elif self.append and self._hdr_dump == '' and ' 200 ' in buf:
++                    # reget was attempted but server sends it all
++                    # undo what we did in _build_range()
++                    self.append = False
++                    self.reget_time = None
++                    self._amount_read = 0
++                    self._reget_length = 0
++                    self.fo.truncate(0)
+             elif self.scheme in ['ftp']:
+                 s = None
+                 if buf.startswith('213 '):
+@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
                      s = parse150(buf)
                  if s:
                      self.size = int(s)
@@ -913,6 +933,7 @@ index e090e90..1afb2c5 100644
 +                self.scheme = urlparse.urlsplit(location)[0]
 +                self.url = location
 +                
++            self._hdr_dump += buf
 +            if len(self._hdr_dump) != 0 and buf == '\r\n':
 +                self._hdr_ended = True
 +                if DEBUG: DEBUG.debug('header ended:')
@@ -920,7 +941,7 @@ index e090e90..1afb2c5 100644
              return len(buf)
          except KeyboardInterrupt:
              return pycurl.READFUNC_ABORT
-@@ -1113,8 +1320,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
          if self._parsed_hdr:
              return self._parsed_hdr
          statusend = self._hdr_dump.find('\n')
@@ -931,7 +952,7 @@ index e090e90..1afb2c5 100644
          self._parsed_hdr =  mimetools.Message(hdrfp)
          return self._parsed_hdr
      
-@@ -1127,6 +1336,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
          if not opts:
              opts = self.opts
  
@@ -941,7 +962,7 @@ index e090e90..1afb2c5 100644
  
          # defaults we're always going to set
          self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1348,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
          self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
          self.curl_obj.setopt(pycurl.FAILONERROR, True)
          self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -964,7 +985,7 @@ index e090e90..1afb2c5 100644
          
          # maybe to be options later
          self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1370,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
          
          # timeouts
          timeout = 300
@@ -979,7 +1000,7 @@ index e090e90..1afb2c5 100644
  
          # ssl options
          if self.scheme == 'https':
-@@ -1158,13 +1382,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
                  self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                  self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
              self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@@ -997,7 +1018,7 @@ index e090e90..1afb2c5 100644
              if opts.ssl_cert_type:                
                  self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
              if opts.ssl_key_pass:
-@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
+@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
          if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
              self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
              
@@ -1044,7 +1065,7 @@ index e090e90..1afb2c5 100644
              
          # our url
          self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
+@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
              
              code = self.http_code
              errcode = e.args[0]
@@ -1090,7 +1111,7 @@ index e090e90..1afb2c5 100644
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
                  # which almost always means something aborted it from outside
-@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
+@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
                  # figure out what aborted the pycurl process FIXME
                  raise KeyboardInterrupt
                  
@@ -1186,14 +1207,7 @@ index e090e90..1afb2c5 100644
  
      def _do_open(self):
          self.curl_obj = _curl_cache
--        self.curl_obj.reset() # reset all old settings away, just in case
-+        # reset() clears PYCURL_ERRORBUFFER, and there's no way
-+        # to reinitialize it, so better don't do that.  BZ 896025
-+        #self.curl_obj.reset() # reset all old settings away, just in case
-         # setup any ranges
-         self._set_opts()
-         self._do_grab()
-@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
                  
          if self.opts.range:
              rt = self.opts.range
@@ -1206,7 +1220,7 @@ index e090e90..1afb2c5 100644
  
          if rt:
              header = range_tuple_to_header(rt)
-@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
              #fh, self._temp_name = mkstemp()
              #self.fo = open(self._temp_name, 'wb')
  
@@ -1260,7 +1274,7 @@ index e090e90..1afb2c5 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
              if self._prog_running:
                  downloaded += self._reget_length
                  self.opts.progress_obj.update(downloaded)
@@ -1286,7 +1300,7 @@ index e090e90..1afb2c5 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
              return True
          return False
          
@@ -1300,7 +1314,7 @@ index e090e90..1afb2c5 100644
      def read(self, amt=None):
          self._fill_buffer(amt)
          if amt is None:
-@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()
          
@@ -1323,7 +1337,7 @@ index e090e90..1afb2c5 100644
  
  #####################################################################
  # DEPRECATED FUNCTIONS
-@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
  
          
  #####################################################################

From bc8c777de15f6cfcf4a9c60808fb64db794d3c67 Mon Sep 17 00:00:00 2001
From: Zdenek Pavlas <zpavlas@redhat.com>
Date: Fri, 17 May 2013 10:12:39 +0200
Subject: [PATCH 2/3] Update to latest HEAD.

---
 python-urlgrabber.spec |  7 +++-
 urlgrabber-HEAD.patch  | 79 +++++++++++++++++++++++++++++++-----------
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index fa7dc7d..0531d94 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 26%{?dist}
+Release: 27%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-27
+- Update to latest HEAD.
+- add URLGrabError.code to the external downloader API
+- Disable GSSNEGOTIATE to work around a curl bug.  BZ 960163
+
 * Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26
 - Update to latest HEAD.
 - Handle HTTP 200 response to range requests correctly.  BZ 919076
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 8947982..57f41ff 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -81,7 +81,7 @@ index 518e512..07881b3 100644
          try:
 diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
 new file mode 100755
-index 0000000..3dafb12
+index 0000000..9ea0e70
 --- /dev/null
 +++ b/scripts/urlgrabber-ext-down
 @@ -0,0 +1,75 @@
@@ -155,7 +155,7 @@ index 0000000..3dafb12
 +                ug_err = 'OK'
 +            except URLGrabError, e:
 +                size = 0
-+                ug_err = '%d %s' % e.args
++                ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror)
 +            write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err)
 +
 +if __name__ == '__main__':
@@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644
  
  # set to a proftp server only. we're working around a couple of
 diff --git a/test/test_mirror.py b/test/test_mirror.py
-index 70fe069..cb63a41 100644
+index 70fe069..6fdb668 100644
 --- a/test/test_mirror.py
 +++ b/test/test_mirror.py
 @@ -28,7 +28,7 @@ import os
@@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644
          
      def urlgrab(self, url, filename=None, **kwargs):
          self.calls.append( (url, filename) )
+@@ -265,6 +269,38 @@ class ActionTests(TestCase):
+         self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+                 
+ 
++class HttpReplyCode(TestCase):
++    def setUp(self):
++        def server():
++            import socket
++            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
++            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
++            s.bind(('localhost', 2000)); s.listen(1)
++            while 1:
++                c, a = s.accept()
++                while not c.recv(4096).endswith('\r\n\r\n'): pass
++                c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
++                c.close()
++        import thread
++        self.reply = 503, "Busy"
++        thread.start_new_thread(server, ())
++
++        def failure(obj):
++            self.code = getattr(obj.exception, 'code', None)
++            return {}
++        self.g  = URLGrabber()
++        self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
++
++    def test_grab(self):
++        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
++        self.assertEquals(self.code, 503); del self.code
++
++        err = []
++        self.mg.urlgrab('foo', async = True, failfunc = err.append)
++        urlgrabber.grabber.parallel_wait()
++        self.assertEquals([e.exception.errno for e in err], [256])
++        self.assertEquals(self.code, 503); del self.code
++
+ def suite():
+     tl = TestLoader()
+     return tl.loadTestsFromModule(sys.modules[__name__])
 diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
 index 3e5f3b7..8eeaeda 100644
 --- a/urlgrabber/byterange.py
@@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
      return (fb,lb)
  
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..9526dc1 100644
+index e090e90..37d1297 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
 @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -1018,7 +1057,7 @@ index e090e90..9526dc1 100644
              if opts.ssl_cert_type:                
                  self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
              if opts.ssl_key_pass:
-@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
+@@ -1187,28 +1423,26 @@ class PyCurlFileObject():
          if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
              self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
              
@@ -1042,11 +1081,9 @@ index e090e90..9526dc1 100644
 +        # proxy
 +        if opts.proxy is not None:
 +            self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
-+            auth = pycurl.HTTPAUTH_ANY
-+            if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
-+                # BZ 769254: work around a bug in curl < 7.28.0
-+                auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
-+            self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
++            self.curl_obj.setopt(pycurl.PROXYAUTH,
++                # All but Kerberos.  BZ 769254
++                pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
 +
 +        if opts.username and opts.password:
 +            if self.scheme in ('http', 'https'):
@@ -1065,7 +1102,7 @@ index e090e90..9526dc1 100644
              
          # our url
          self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
+@@ -1228,39 +1462,26 @@ class PyCurlFileObject():
              
              code = self.http_code
              errcode = e.args[0]
@@ -1111,7 +1148,7 @@ index e090e90..9526dc1 100644
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
                  # which almost always means something aborted it from outside
-@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
+@@ -1269,36 +1490,70 @@ class PyCurlFileObject():
                  # figure out what aborted the pycurl process FIXME
                  raise KeyboardInterrupt
                  
@@ -1207,7 +1244,7 @@ index e090e90..9526dc1 100644
  
      def _do_open(self):
          self.curl_obj = _curl_cache
-@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1588,11 @@ class PyCurlFileObject():
                  
          if self.opts.range:
              rt = self.opts.range
@@ -1220,7 +1257,7 @@ index e090e90..9526dc1 100644
  
          if rt:
              header = range_tuple_to_header(rt)
-@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1693,46 @@ class PyCurlFileObject():
              #fh, self._temp_name = mkstemp()
              #self.fo = open(self._temp_name, 'wb')
  
@@ -1274,7 +1311,7 @@ index e090e90..9526dc1 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1810,20 @@ class PyCurlFileObject():
              if self._prog_running:
                  downloaded += self._reget_length
                  self.opts.progress_obj.update(downloaded)
@@ -1300,7 +1337,7 @@ index e090e90..9526dc1 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1831,6 @@ class PyCurlFileObject():
              return True
          return False
          
@@ -1314,7 +1351,7 @@ index e090e90..9526dc1 100644
      def read(self, amt=None):
          self._fill_buffer(amt)
          if amt is None:
-@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1862,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()
          
@@ -1337,7 +1374,7 @@ index e090e90..9526dc1 100644
  
  #####################################################################
  # DEPRECATED FUNCTIONS
-@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
  
          
  #####################################################################
@@ -1478,7 +1515,7 @@ index e090e90..9526dc1 100644
 +            raise KeyboardInterrupt
 +        for line in lines:
 +            # parse downloader output
-+            line = line.split(' ', 5)
++            line = line.split(' ', 6)
 +            _id, size = map(int, line[:2])
 +            if len(line) == 2:
 +                self.running[_id]._progress.update(size)
@@ -1489,7 +1526,9 @@ index e090e90..9526dc1 100644
 +                ug_err = None
 +                if DEBUG: DEBUG.info('success')
 +            else:
-+                ug_err = URLGrabError(int(line[4]), line[5])
++                ug_err = URLGrabError(int(line[4]), line[6])
++                if line[5] != '0':
++                    ug_err.code = int(line[5])
 +                if DEBUG: DEBUG.info('failure: %s', ug_err)
 +            _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
 +            ret.append((opts, size, ug_err))

From b619025a751cf502f8dafb0fd03e63ec305039c6 Mon Sep 17 00:00:00 2001
From: Zdenek Pavlas <zpavlas@redhat.com>
Date: Mon, 20 May 2013 16:37:23 +0200
Subject: [PATCH 3/3] Update to latest HEAD

---
 python-urlgrabber.spec |   7 ++-
 urlgrabber-HEAD.patch  | 109 ++++++++++++++++++++++-------------------
 2 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 0531d94..703b0d0 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 27%{?dist}
+Release: 28%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-28
+- Update to latest HEAD.
+- Add the "minrate" option. BZ 964298
+- Workaround progress "!!!" end for file:// repos.
+
 * Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-27
 - Update to latest HEAD.
 - add URLGrabError.code to the external downloader API
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 57f41ff..d53e4c3 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -314,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
      return (fb,lb)
  
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..37d1297 100644
+index e090e90..05ea9c3 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
 @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -345,12 +345,19 @@ index e090e90..37d1297 100644
    text = None
    
      specifies alternative text to be passed to the progress meter
-@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
+@@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs)
      (which can be set on default_grabber.throttle) is used. See
      BANDWIDTH THROTTLING for more information.
  
 -  timeout = None
 +  timeout = 300
++
++    a positive integer expressing the number of seconds to wait before
++    timing out attempts to connect to a server. If the value is None
++    or 0, connection attempts will not time out. The timeout is passed
++    to the underlying pycurl object as its CONNECTTIMEOUT option, see
++    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
++    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
  
 -    a positive float expressing the number of seconds to wait for socket
 -    operations. If the value is None or 0.0, socket operations will block
@@ -358,16 +365,15 @@ index e090e90..37d1297 100644
 -    method on the Socket object used for the request. See the Python
 -    documentation on settimeout for more information.
 -    http://www.python.org/doc/current/lib/socket-objects.html
-+    a positive integer expressing the number of seconds to wait before
-+    timing out attempts to connect to a server. If the value is None
-+    or 0, connection attempts will not time out. The timeout is passed
-+    to the underlying pycurl object as its CONNECTTIMEOUT option, see
-+    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
-+    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
++  minrate = 1000
++
++    This sets the low speed threshold in bytes per second. If the server
++    is sending data slower than this for at least `timeout' seconds, the
++    library aborts the connection.
  
    bandwidth = 0
  
-@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs)
      note that proxy authentication information may be provided using
      normal URL constructs:
        proxies={ 'http' : 'http://user:host@foo:3128' }
@@ -382,7 +388,7 @@ index e090e90..37d1297 100644
  
    prefix = None
  
-@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs)
      control, you should probably subclass URLParser and pass it in via
      the 'urlparser' option.
  
@@ -395,7 +401,7 @@ index e090e90..37d1297 100644
    ssl_ca_cert = None
  
      this option can be used if M2Crypto is available and will be
-@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
+@@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs)
      No-op when using the curl backend (default)
     
  
@@ -480,7 +486,7 @@ index e090e90..37d1297 100644
  
  RETRY RELATED ARGUMENTS
  
-@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
+@@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS
      but it cannot (without severe trickiness) prevent the exception
      from being raised.
  
@@ -496,7 +502,7 @@ index e090e90..37d1297 100644
    interrupt_callback = None
  
      This callback is called if KeyboardInterrupt is received at any
-@@ -420,6 +486,7 @@ import time
+@@ -420,6 +492,7 @@ import time
  import string
  import urllib
  import urllib2
@@ -504,7 +510,7 @@ index e090e90..37d1297 100644
  import mimetools
  import thread
  import types
-@@ -428,9 +495,17 @@ import pycurl
+@@ -428,9 +501,17 @@ import pycurl
  from ftplib import parse150
  from StringIO import StringIO
  from httplib import HTTPException
@@ -523,7 +529,7 @@ index e090e90..37d1297 100644
  ########################################################################
  #                     MODULE INITIALIZATION
  ########################################################################
-@@ -439,6 +514,12 @@ try:
+@@ -439,6 +520,12 @@ try:
  except:
      __version__ = '???'
  
@@ -536,7 +542,7 @@ index e090e90..37d1297 100644
  ########################################################################
  # functions for debugging output.  These functions are here because they
  # are also part of the module initialization.
-@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None):
+@@ -504,6 +591,7 @@ def _init_default_logger(logspec=None):
          else:  handler = logging.FileHandler(filename)
          handler.setFormatter(formatter)
          DBOBJ = logging.getLogger('urlgrabber')
@@ -544,7 +550,7 @@ index e090e90..37d1297 100644
          DBOBJ.addHandler(handler)
          DBOBJ.setLevel(level)
      except (KeyError, ImportError, ValueError):
-@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None):
+@@ -512,8 +600,8 @@ def _init_default_logger(logspec=None):
  
  def _log_package_state():
      if not DEBUG: return
@@ -555,7 +561,7 @@ index e090e90..37d1297 100644
          
  _init_default_logger()
  _log_package_state()
-@@ -527,6 +609,29 @@ def _(st):
+@@ -527,6 +615,29 @@ def _(st):
  #                 END MODULE INITIALIZATION
  ########################################################################
  
@@ -585,7 +591,7 @@ index e090e90..37d1297 100644
  
  
  class URLGrabError(IOError):
-@@ -662,6 +767,7 @@ class URLParser:
+@@ -662,6 +773,7 @@ class URLParser:
            opts.quote = 0     --> do not quote it
            opts.quote = None  --> guess
          """
@@ -593,7 +599,7 @@ index e090e90..37d1297 100644
          quote = opts.quote
          
          if opts.prefix:
-@@ -768,6 +874,41 @@ class URLGrabberOptions:
+@@ -768,6 +880,41 @@ class URLGrabberOptions:
          else: # throttle is a float
              return self.bandwidth * self.throttle
          
@@ -635,7 +641,7 @@ index e090e90..37d1297 100644
      def derive(self, **kwargs):
          """Create a derived URLGrabberOptions instance.
          This method creates a new instance and overrides the
-@@ -791,30 +932,37 @@ class URLGrabberOptions:
+@@ -791,30 +938,38 @@ class URLGrabberOptions:
          provided here.
          """
          self.progress_obj = None
@@ -663,6 +669,7 @@ index e090e90..37d1297 100644
          self.cache_openers = True
 -        self.timeout = None
 +        self.timeout = 300
++        self.minrate = None
          self.text = None
          self.http_headers = None
          self.ftp_headers = None
@@ -674,7 +681,7 @@ index e090e90..37d1297 100644
          self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
          self.ssl_context = None # no-op in pycurl
          self.ssl_verify_peer = True # check peer's cert for authenticityb
-@@ -827,6 +975,12 @@ class URLGrabberOptions:
+@@ -827,6 +982,12 @@ class URLGrabberOptions:
          self.size = None # if we know how big the thing we're getting is going
                           # to be. this is ultimately a MAXIMUM size for the file
          self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@@ -687,7 +694,7 @@ index e090e90..37d1297 100644
          
      def __repr__(self):
          return self.format()
-@@ -846,7 +1000,18 @@ class URLGrabberOptions:
+@@ -846,7 +1007,18 @@ class URLGrabberOptions:
          s = s + indent + '}'
          return s
  
@@ -707,7 +714,7 @@ index e090e90..37d1297 100644
      """Provides easy opening of URLs with a variety of options.
      
      All options are specified as kwargs. Options may be specified when
-@@ -872,7 +1037,6 @@ class URLGrabber:
+@@ -872,7 +1044,6 @@ class URLGrabber:
              # beware of infinite loops :)
              tries = tries + 1
              exception = None
@@ -715,7 +722,7 @@ index e090e90..37d1297 100644
              callback  = None
              if DEBUG: DEBUG.info('attempt %i/%s: %s',
                                   tries, opts.retry, args[0])
-@@ -883,54 +1047,62 @@ class URLGrabber:
+@@ -883,54 +1054,62 @@ class URLGrabber:
              except URLGrabError, e:
                  exception = e
                  callback = opts.failure_callback
@@ -785,7 +792,7 @@ index e090e90..37d1297 100644
          if scheme == 'file' and not opts.copy_local:
              # just return the name of the local file - don't make a 
              # copy currently
-@@ -950,41 +1122,51 @@ class URLGrabber:
+@@ -950,41 +1129,51 @@ class URLGrabber:
  
              elif not opts.range:
                  if not opts.checkfunc is None:
@@ -852,7 +859,7 @@ index e090e90..37d1297 100644
          if limit is not None:
              limit = limit + 1
              
-@@ -1000,12 +1182,8 @@ class URLGrabber:
+@@ -1000,12 +1189,8 @@ class URLGrabber:
                  else: s = fo.read(limit)
  
                  if not opts.checkfunc is None:
@@ -867,7 +874,7 @@ index e090e90..37d1297 100644
              finally:
                  fo.close()
              return s
-@@ -1020,6 +1198,7 @@ class URLGrabber:
+@@ -1020,6 +1205,7 @@ class URLGrabber:
          return s
          
      def _make_callback(self, callback_obj):
@@ -875,7 +882,7 @@ index e090e90..37d1297 100644
          if callable(callback_obj):
              return callback_obj, (), {}
          else:
-@@ -1030,7 +1209,7 @@ class URLGrabber:
+@@ -1030,7 +1216,7 @@ class URLGrabber:
  default_grabber = URLGrabber()
  
  
@@ -884,7 +891,7 @@ index e090e90..37d1297 100644
      def __init__(self, url, filename, opts):
          self.fo = None
          self._hdr_dump = ''
-@@ -1052,10 +1231,13 @@ class PyCurlFileObject():
+@@ -1052,10 +1238,13 @@ class PyCurlFileObject():
          self._reget_length = 0
          self._prog_running = False
          self._error = (None, None)
@@ -900,7 +907,7 @@ index e090e90..37d1297 100644
      def __getattr__(self, name):
          """This effectively allows us to wrap at the instance level.
          Any attribute not found in _this_ object will be searched for
-@@ -1067,6 +1249,12 @@ class PyCurlFileObject():
+@@ -1067,6 +1256,12 @@ class PyCurlFileObject():
  
      def _retrieve(self, buf):
          try:
@@ -913,7 +920,7 @@ index e090e90..37d1297 100644
              if not self._prog_running:
                  if self.opts.progress_obj:
                      size  = self.size + self._reget_length
-@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
+@@ -1079,23 +1274,40 @@ class PyCurlFileObject():
                      self.opts.progress_obj.update(self._amount_read)
  
              self._amount_read += len(buf)
@@ -960,7 +967,7 @@ index e090e90..37d1297 100644
              elif self.scheme in ['ftp']:
                  s = None
                  if buf.startswith('213 '):
-@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
+@@ -1104,7 +1316,18 @@ class PyCurlFileObject():
                      s = parse150(buf)
                  if s:
                      self.size = int(s)
@@ -980,7 +987,7 @@ index e090e90..37d1297 100644
              return len(buf)
          except KeyboardInterrupt:
              return pycurl.READFUNC_ABORT
-@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1336,10 @@ class PyCurlFileObject():
          if self._parsed_hdr:
              return self._parsed_hdr
          statusend = self._hdr_dump.find('\n')
@@ -991,7 +998,7 @@ index e090e90..37d1297 100644
          self._parsed_hdr =  mimetools.Message(hdrfp)
          return self._parsed_hdr
      
-@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1352,9 @@ class PyCurlFileObject():
          if not opts:
              opts = self.opts
  
@@ -1001,7 +1008,7 @@ index e090e90..37d1297 100644
  
          # defaults we're always going to set
          self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1364,21 @@ class PyCurlFileObject():
          self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
          self.curl_obj.setopt(pycurl.FAILONERROR, True)
          self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -1024,7 +1031,7 @@ index e090e90..37d1297 100644
          
          # maybe to be options later
          self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1386,11 @@ class PyCurlFileObject():
          
          # timeouts
          timeout = 300
@@ -1034,12 +1041,12 @@ index e090e90..37d1297 100644
 +        if hasattr(opts, 'timeout'):
 +            timeout = int(opts.timeout or 0)
 +        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
-+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000)
++        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
 +        self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
  
          # ssl options
          if self.scheme == 'https':
-@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1398,16 @@ class PyCurlFileObject():
                  self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                  self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
              self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@@ -1057,7 +1064,7 @@ index e090e90..37d1297 100644
              if opts.ssl_cert_type:                
                  self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
              if opts.ssl_key_pass:
-@@ -1187,28 +1423,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1430,26 @@ class PyCurlFileObject():
          if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
              self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
              
@@ -1102,7 +1109,7 @@ index e090e90..37d1297 100644
              
          # our url
          self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1462,26 @@ class PyCurlFileObject():
+@@ -1228,39 +1469,26 @@ class PyCurlFileObject():
              
              code = self.http_code
              errcode = e.args[0]
@@ -1148,7 +1155,7 @@ index e090e90..37d1297 100644
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
                  # which almost always means something aborted it from outside
-@@ -1269,36 +1490,70 @@ class PyCurlFileObject():
+@@ -1269,36 +1497,70 @@ class PyCurlFileObject():
                  # figure out what aborted the pycurl process FIXME
                  raise KeyboardInterrupt
                  
@@ -1244,7 +1251,7 @@ index e090e90..37d1297 100644
  
      def _do_open(self):
          self.curl_obj = _curl_cache
-@@ -1333,7 +1588,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1595,11 @@ class PyCurlFileObject():
                  
          if self.opts.range:
              rt = self.opts.range
@@ -1257,7 +1264,7 @@ index e090e90..37d1297 100644
  
          if rt:
              header = range_tuple_to_header(rt)
-@@ -1434,21 +1693,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1700,46 @@ class PyCurlFileObject():
              #fh, self._temp_name = mkstemp()
              #self.fo = open(self._temp_name, 'wb')
  
@@ -1311,7 +1318,7 @@ index e090e90..37d1297 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1526,17 +1810,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1817,20 @@ class PyCurlFileObject():
              if self._prog_running:
                  downloaded += self._reget_length
                  self.opts.progress_obj.update(downloaded)
@@ -1337,7 +1344,7 @@ index e090e90..37d1297 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1544,13 +1831,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1838,6 @@ class PyCurlFileObject():
              return True
          return False
          
@@ -1351,7 +1358,7 @@ index e090e90..37d1297 100644
      def read(self, amt=None):
          self._fill_buffer(amt)
          if amt is None:
-@@ -1582,9 +1862,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1869,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()
          
@@ -1374,7 +1381,7 @@ index e090e90..37d1297 100644
  
  #####################################################################
  # DEPRECATED FUNCTIONS
-@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
  
          
  #####################################################################
@@ -1480,7 +1487,7 @@ index e090e90..37d1297 100644
 +    # list of options we pass to downloader
 +    _options = (
 +        'url', 'filename',
-+        'timeout', 'close_connection', 'keepalive',
++        'timeout', 'minrate', 'close_connection', 'keepalive',
 +        'throttle', 'bandwidth', 'range', 'reget',
 +        'user_agent', 'http_headers', 'ftp_headers',
 +        'proxy', 'prefix', 'username', 'password',
@@ -2022,7 +2029,7 @@ index dad410b..7975f1b 100644
      def urlopen(self, url, **kwargs):
          kw = dict(kwargs)
 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
-index dd07c6a..077fd99 100644
+index dd07c6a..b456a0c 100644
 --- a/urlgrabber/progress.py
 +++ b/urlgrabber/progress.py
 @@ -133,8 +133,8 @@ class BaseMeter:
@@ -2048,7 +2055,7 @@ index dd07c6a..077fd99 100644
 +    return tl.add(' [%-*.*s]' % (blen, blen, bar))
 +
 +def _term_add_end(tl, osize, size):
-+    if osize is not None:
++    if osize: # osize should be None or >0, but that's been broken.
 +        if size > osize: # Is ??? better? Really need something to say < vs >.
 +            return tl.add(' !!! '), True
 +        elif size != osize: