diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py --- urlgrabber-3.0.0-orig/urlgrabber/grabber.py 2008-05-02 17:18:14.000000000 -0400 +++ urlgrabber-3.0.0/urlgrabber/grabber.py 2008-05-02 17:30:35.000000000 -0400 @@ -400,6 +400,11 @@ HTTPException = None try: + from httplib import HTTPResponse +except ImportError, msg: + HTTPResponse = None + +try: # This is a convenient way to make keepalive optional. # Just rename the module so it can't be imported. import keepalive @@ -1065,8 +1070,21 @@ req = urllib2.Request(self.url, self.opts.data) # build request object self._add_headers(req) # add misc headers that we need self._build_range(req) # take care of reget and byterange stuff + + def _checkNoReget(fo): + # HTTP can disallow Range requests + + if self.opts.reget is None: + return False + if 'Range' not in req.headers: + return False + if not isinstance(fo, HTTPResponse): + return False + + return fo.code != 206 fo, hdr = self._make_request(req, opener) + fetch_again = 0 if self.reget_time and self.opts.reget == 'check_timestamp': # do this if we have a local file with known timestamp AND # we're in check_timestamp reget mode. @@ -1078,14 +1096,26 @@ except (TypeError,): fetch_again = 1 + if _checkNoReget(fo): # doing a "reget" didn't work, so fixup + fetch_again = 0 + self.opts.reget = None + self.append = 0 + self._amount_read = 0 + + if True: if fetch_again: # the server version is newer than the (incomplete) local # version, so we should abandon the version we're getting # and fetch the whole thing again. fo.close() + # Without this we'll have to read all of the previous request + # data. For "large" requests, this is very bad. + fo.close_connection() self.opts.reget = None del req.headers['Range'] self._build_range(req) + # This doesn't get reset in _build_range() *sigh* + self._amount_read = 0 fo, hdr = self._make_request(req, opener) (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)