Update to latest HEAD

12 years ago · de85533e85
parent c508ad399b
commit de85533e85
2 changed files with 45 additions and 26 deletions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 25%{?dist}
+Release: 26%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch

@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down

 %changelog
+* Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26
+- Update to latest HEAD.
+- Handle HTTP 200 response to range requests correctly.  BZ 919076
+- Reset curl_obj to clear CURLOPT_RANGE from previous requests.  BZ 923951
+
 * Thu Mar  7 2013 Zdeněk Pavlas <zpavlas@redhat.com> - 3.9.1-25
 - Update to latest HEAD.
 - fix some test cases that were failing.  BZ 918658
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -275,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
     return (fb,lb)
 
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..1afb2c5 100644
+index e090e90..9526dc1 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -874,7 +874,7 @@ index e090e90..1afb2c5 100644
             if not self._prog_running:
                 if self.opts.progress_obj:
                     size  = self.size + self._reget_length
-@@ -1079,15 +1267,24 @@ class PyCurlFileObject():
+@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
                     self.opts.progress_obj.update(self._amount_read)
 
             self._amount_read += len(buf)
@ -899,9 +899,29 @@ index e090e90..1afb2c5 100644
 -            return -1            
 +            return -1
         try:
-             self._hdr_dump += buf
+-            self._hdr_dump += buf
             # we have to get the size before we do the progress obj start
-@@ -1104,7 +1301,17 @@ class PyCurlFileObject():
+             # but we can't do that w/o making it do 2 connects, which sucks
+             # so we cheat and stuff it in here in the hdr_retrieve
+-            if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
+-                length = buf.split(':')[1]
+-                self.size = int(length)
+            if self.scheme in ['http','https']:
+                if buf.lower().find('content-length') != -1:
+                    length = buf.split(':')[1]
+                    self.size = int(length)
+                elif self.append and self._hdr_dump == '' and ' 200 ' in buf:
+                    # reget was attempted but server sends it all
+                    # undo what we did in _build_range()
+                    self.append = False
+                    self.reget_time = None
+                    self._amount_read = 0
+                    self._reget_length = 0
+                    self.fo.truncate(0)
+             elif self.scheme in ['ftp']:
+                 s = None
+                 if buf.startswith('213 '):
+@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
                     s = parse150(buf)
                 if s:
                     self.size = int(s)
@ -913,6 +933,7 @@ index e090e90..1afb2c5 100644
 +                self.scheme = urlparse.urlsplit(location)[0]
 +                self.url = location
 +                
+            self._hdr_dump += buf
 +            if len(self._hdr_dump) != 0 and buf == '\r\n':
 +                self._hdr_ended = True
 +                if DEBUG: DEBUG.debug('header ended:')
@ -920,7 +941,7 @@ index e090e90..1afb2c5 100644
             return len(buf)
         except KeyboardInterrupt:
             return pycurl.READFUNC_ABORT
-@@ -1113,8 +1320,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
         if self._parsed_hdr:
             return self._parsed_hdr
         statusend = self._hdr_dump.find('\n')
@ -931,7 +952,7 @@ index e090e90..1afb2c5 100644
         self._parsed_hdr =  mimetools.Message(hdrfp)
         return self._parsed_hdr
     
-@@ -1127,6 +1336,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
         if not opts:
             opts = self.opts
 
@ -941,7 +962,7 @@ index e090e90..1afb2c5 100644
 
         # defaults we're always going to set
         self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1348,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
         self.curl_obj.setopt(pycurl.FAILONERROR, True)
         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -964,7 +985,7 @@ index e090e90..1afb2c5 100644
         
         # maybe to be options later
         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1370,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
         
         # timeouts
         timeout = 300
@ -979,7 +1000,7 @@ index e090e90..1afb2c5 100644
 
         # ssl options
         if self.scheme == 'https':
-@@ -1158,13 +1382,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                 self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
             self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@ -997,7 +1018,7 @@ index e090e90..1afb2c5 100644
             if opts.ssl_cert_type:                
                 self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
             if opts.ssl_key_pass:
-@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
+@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
             
@ -1044,7 +1065,7 @@ index e090e90..1afb2c5 100644
             
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
+@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
             
             code = self.http_code
             errcode = e.args[0]
@ -1090,7 +1111,7 @@ index e090e90..1afb2c5 100644
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
                 # which almost always means something aborted it from outside
-@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
+@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
                 # figure out what aborted the pycurl process FIXME
                 raise KeyboardInterrupt
                 
@ -1186,14 +1207,7 @@ index e090e90..1afb2c5 100644
 
     def _do_open(self):
         self.curl_obj = _curl_cache
-        self.curl_obj.reset() # reset all old settings away, just in case
-+        # reset() clears PYCURL_ERRORBUFFER, and there's no way
-+        # to reinitialize it, so better don't do that.  BZ 896025
-+        #self.curl_obj.reset() # reset all old settings away, just in case
-         # setup any ranges
-         self._set_opts()
-         self._do_grab()
-@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
                 
         if self.opts.range:
             rt = self.opts.range
@ -1206,7 +1220,7 @@ index e090e90..1afb2c5 100644
 
         if rt:
             header = range_tuple_to_header(rt)
-@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
             #fh, self._temp_name = mkstemp()
             #self.fo = open(self._temp_name, 'wb')
 
@ -1260,7 +1274,7 @@ index e090e90..1afb2c5 100644
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
-@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
             if self._prog_running:
                 downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
@ -1286,7 +1300,7 @@ index e090e90..1afb2c5 100644
 
             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
-@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
             return True
         return False
         
@ -1300,7 +1314,7 @@ index e090e90..1afb2c5 100644
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
-@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()
         
@ -1323,7 +1337,7 @@ index e090e90..1afb2c5 100644
 
 #####################################################################
 # DEPRECATED FUNCTIONS
-@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
 
         
 #####################################################################