From 7dae824261bd658c2429371dabee5c7c04f09ab9 Mon Sep 17 00:00:00 2001 From: James Antill Date: Fri, 2 May 2008 21:36:54 +0000 Subject: [PATCH] - Fix reget's against servers that don't allow Range requests, also tweaks - reget == check_timestamp, if anyone/thing uses that. - Resolves: bug#435156 - Fix minor typo in progress for single instance. --- python-urlgrabber.spec | 10 +++++- urlgrabber-grab-no-range.patch | 64 ++++++++++++++++++++++++++++++++++ urlgrabber-progress-ui.patch | 2 +- 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 urlgrabber-grab-no-range.patch diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 2fe2c9c..c3b281f 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,13 +3,14 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.0.0 -Release: 6%{?dist} +Release: 7%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch0: urlgrabber-keepalive.patch Patch1: urlgrabber-string-type.patch Patch2: urlgrabber-3.0.0-cleanup.patch Patch3: urlgrabber-ftp-port.patch Patch4: urlgrabber-progress-ui.patch +Patch5: urlgrabber-grab-no-range.patch License: LGPLv2+ Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root @@ -30,6 +31,7 @@ authentication, proxies and more. %patch2 -p1 %patch3 -p0 %patch4 -p0 +%patch5 -p1 %build python setup.py build @@ -49,6 +51,12 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/urlgrabber %changelog +* Fri May 2 2008 James Antill 3.0.0-7 +- Fix reget's against servers that don't allow Range requests, also tweaks +- reget == check_timestamp, if anyone/thing uses that. +- Resolves: bug#435156 +- Fix minor typo in progress for single instance. + * Mon Apr 7 2008 James Antill 3.0.0-6 - Fix the ftp byterange port problem: - Resolves: bug#419241 diff --git a/urlgrabber-grab-no-range.patch b/urlgrabber-grab-no-range.patch new file mode 100644 index 0000000..32a3d05 --- /dev/null +++ b/urlgrabber-grab-no-range.patch @@ -0,0 +1,64 @@ +diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py +--- urlgrabber-3.0.0-orig/urlgrabber/grabber.py 2008-05-02 17:18:14.000000000 -0400 ++++ urlgrabber-3.0.0/urlgrabber/grabber.py 2008-05-02 17:30:35.000000000 -0400 +@@ -400,6 +400,11 @@ + HTTPException = None + + try: ++ from httplib import HTTPResponse ++except ImportError, msg: ++ HTTPResponse = None ++ ++try: + # This is a convenient way to make keepalive optional. + # Just rename the module so it can't be imported. + import keepalive +@@ -1065,8 +1070,21 @@ + req = urllib2.Request(self.url, self.opts.data) # build request object + self._add_headers(req) # add misc headers that we need + self._build_range(req) # take care of reget and byterange stuff ++ ++ def _checkNoReget(fo): ++ # HTTP can disallow Range requests ++ ++ if self.opts.reget is None: ++ return False ++ if 'Range' not in req.headers: ++ return False ++ if not isinstance(fo, HTTPResponse): ++ return False ++ ++ return fo.code != 206 + + fo, hdr = self._make_request(req, opener) ++ fetch_again = 0 + if self.reget_time and self.opts.reget == 'check_timestamp': + # do this if we have a local file with known timestamp AND + # we're in check_timestamp reget mode. +@@ -1078,14 +1096,26 @@ + except (TypeError,): + fetch_again = 1 + ++ if _checkNoReget(fo): # doing a "reget" didn't work, so fixup ++ fetch_again = 0 ++ self.opts.reget = None ++ self.append = 0 ++ self._amount_read = 0 ++ ++ if True: + if fetch_again: + # the server version is newer than the (incomplete) local + # version, so we should abandon the version we're getting + # and fetch the whole thing again. + fo.close() ++ # Without this we'll have to read all of the previous request ++ # data. For "large" requests, this is very bad. ++ fo.close_connection() + self.opts.reget = None + del req.headers['Range'] + self._build_range(req) ++ # This doesn't get reset in _build_range() *sigh* ++ self._amount_read = 0 + fo, hdr = self._make_request(req, opener) + + (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url) diff --git a/urlgrabber-progress-ui.patch b/urlgrabber-progress-ui.patch index ce911f1..5fe3cda 100644 --- a/urlgrabber-progress-ui.patch +++ b/urlgrabber-progress-ui.patch @@ -47,7 +47,7 @@ diff -u -r1.7 urlgrabber/progress.py + bar = '='*int(10 * frac) + ave_dl = format_number(self.re.average_rate()) + if sofar_size is None: -+ out = '\r%-25.25s %3i%% |%-14.14s| %5sB/s | %5sB %9s ETA ' % \ ++ out = '\r%-29.29s %3i%% |%-10.10s| %5sB/s | %5sB %9s ETA ' % \ + (text, frac*100, bar, ave_dl, fread, frtime) + else: + fmt ='\r%-22.22s %3i%% |%4i%% |%-10.10s| %5sB/s | %5sB %9s ETA '