python-urlgrabber/urlgrabber-grab-no-range.patch

diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py
--- urlgrabber-3.0.0-orig/urlgrabber/grabber.py	2008-05-02 17:18:14.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/grabber.py	2008-05-02 17:30:35.000000000 -0400
@@ -400,6 +400,11 @@
     HTTPException = None
 
 try:
+    from httplib import HTTPResponse
+except ImportError, msg:
+    HTTPResponse = None
+
+try:
     # This is a convenient way to make keepalive optional.
     # Just rename the module so it can't be imported.
     import keepalive
@@ -1065,8 +1070,21 @@
         req = urllib2.Request(self.url, self.opts.data) # build request object
         self._add_headers(req) # add misc headers that we need
         self._build_range(req) # take care of reget and byterange stuff
+        
+        def _checkNoReget(fo):
+            # HTTP can disallow Range requests
+            
+            if self.opts.reget is None:
+                return False
+            if 'Range' not in req.headers:
+                return False
+            if not isinstance(fo, HTTPResponse):
+                return False
+
+            return fo.code != 206
 
         fo, hdr = self._make_request(req, opener)
+        fetch_again = 0
         if self.reget_time and self.opts.reget == 'check_timestamp':
             # do this if we have a local file with known timestamp AND
             # we're in check_timestamp reget mode.
@@ -1078,14 +1096,26 @@
             except (TypeError,):
                 fetch_again = 1
             
+        if _checkNoReget(fo): # doing a "reget" didn't work, so fixup
+            fetch_again = 0
+            self.opts.reget = None
+            self.append = 0
+            self._amount_read = 0
+
+        if True:
             if fetch_again:
                 # the server version is newer than the (incomplete) local
                 # version, so we should abandon the version we're getting
                 # and fetch the whole thing again.
                 fo.close()
+                #  Without this we'll have to read all of the previous request
+                # data. For "large" requests, this is very bad.
+                fo.close_connection()
                 self.opts.reget = None
                 del req.headers['Range']
                 self._build_range(req)
+                # This doesn't get reset in _build_range() *sigh*
+                self._amount_read = 0
                 fo, hdr = self._make_request(req, opener)
 
         (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
- Fix reget's against servers that don't allow Range requests, also tweaks - reget == check_timestamp, if anyone/thing uses that. - Resolves: bug#435156 - Fix minor typo in progress for single instance. 17 years ago			`diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py`
			`--- urlgrabber-3.0.0-orig/urlgrabber/grabber.py 2008-05-02 17:18:14.000000000 -0400`
			`+++ urlgrabber-3.0.0/urlgrabber/grabber.py 2008-05-02 17:30:35.000000000 -0400`
			`@@ -400,6 +400,11 @@`
			`HTTPException = None`

			`try:`
			`+ from httplib import HTTPResponse`
			`+except ImportError, msg:`
			`+ HTTPResponse = None`
			`+`
			`+try:`
			`# This is a convenient way to make keepalive optional.`
			`# Just rename the module so it can't be imported.`
			`import keepalive`
			`@@ -1065,8 +1070,21 @@`
			`req = urllib2.Request(self.url, self.opts.data) # build request object`
			`self._add_headers(req) # add misc headers that we need`
			`self._build_range(req) # take care of reget and byterange stuff`
			`+`
			`+ def _checkNoReget(fo):`
			`+ # HTTP can disallow Range requests`
			`+`
			`+ if self.opts.reget is None:`
			`+ return False`
			`+ if 'Range' not in req.headers:`
			`+ return False`
			`+ if not isinstance(fo, HTTPResponse):`
			`+ return False`
			`+`
			`+ return fo.code != 206`

			`fo, hdr = self._make_request(req, opener)`
			`+ fetch_again = 0`
			`if self.reget_time and self.opts.reget == 'check_timestamp':`
			`# do this if we have a local file with known timestamp AND`
			`# we're in check_timestamp reget mode.`
			`@@ -1078,14 +1096,26 @@`
			`except (TypeError,):`
			`fetch_again = 1`

			`+ if _checkNoReget(fo): # doing a "reget" didn't work, so fixup`
			`+ fetch_again = 0`
			`+ self.opts.reget = None`
			`+ self.append = 0`
			`+ self._amount_read = 0`
			`+`
			`+ if True:`
			`if fetch_again:`
			`# the server version is newer than the (incomplete) local`
			`# version, so we should abandon the version we're getting`
			`# and fetch the whole thing again.`
			`fo.close()`
			`+ # Without this we'll have to read all of the previous request`
			`+ # data. For "large" requests, this is very bad.`
			`+ fo.close_connection()`
			`self.opts.reget = None`
			`del req.headers['Range']`
			`self._build_range(req)`
			`+ # This doesn't get reset in _build_range() sigh`
			`+ self._amount_read = 0`
			`fo, hdr = self._make_request(req, opener)`

			`(scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)`