From 88126d0a238225cda023a8a3835ff773fc0cd152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zden=C4=9Bk=20Pavlas?= Date: Fri, 18 May 2012 15:55:18 +0200 Subject: [PATCH] fix file:// profiling. BZ 822632. --- file-url-profiling.patch | 65 ++++++++++++++++++++++++++++++++++++++++ python-urlgrabber.spec | 7 ++++- 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 file-url-profiling.patch diff --git a/file-url-profiling.patch b/file-url-profiling.patch new file mode 100644 index 0000000..2fd1f3c --- /dev/null +++ b/file-url-profiling.patch @@ -0,0 +1,65 @@ +commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8 +Author: Zdeněk Pavlas +Date: Fri May 18 15:38:44 2012 +0200 + + timedhosts: fix file:// profiling. BZ 822632. + + - Do not profile absolute file:// URLs. + - Give a hint to _TH.update() which baseurl was used + so we may profile file:// mirrors, too. + - Strip username and password from stored hostnames. + +diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py +index 094be77..be85f92 100644 +--- a/urlgrabber/grabber.py ++++ b/urlgrabber/grabber.py +@@ -2060,7 +2060,7 @@ class _ExternalDownloader: + else: + ug_err = URLGrabError(int(line[4]), line[5]) + if DEBUG: DEBUG.info('failure: %s', err) +- _TH.update(opts.url, int(line[2]), float(line[3]), ug_err) ++ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) + ret.append((opts, size, ug_err)) + return ret + +@@ -2268,7 +2268,7 @@ class _TH: + if filename and _TH.dirty is None: + try: + for line in open(filename): +- host, speed, fail, ts = line.split() ++ host, speed, fail, ts = line.split(' ', 3) + _TH.hosts[host] = int(speed), int(fail), int(ts) + except IOError: pass + _TH.dirty = False +@@ -2288,9 +2288,14 @@ class _TH: + _TH.dirty = False + + @staticmethod +- def update(url, dl_size, dl_time, ug_err): ++ def update(url, dl_size, dl_time, ug_err, baseurl=None): + _TH.load() +- host = urlparse.urlsplit(url).netloc ++ ++ # Use hostname from URL. If it's a file:// URL, use baseurl. ++ # If no baseurl, do not update timedhosts. ++ host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl ++ if not host: return ++ + speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0) + now = time.time() + +@@ -2311,9 +2316,12 @@ class _TH: + _TH.dirty = True + + @staticmethod +- def estimate(url): ++ def estimate(baseurl): + _TH.load() +- host = urlparse.urlsplit(url).netloc ++ ++ # Use just the hostname, unless it's a file:// baseurl. ++ host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl ++ + default_speed = default_grabber.opts.default_speed + try: speed, fail, ts = _TH.hosts[host] + except KeyError: return default_speed diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 31094ae..fc9909e 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,10 +3,11 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.1 -Release: 12%{?dist} +Release: 13%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch Patch2: multi-downloader.patch +Patch3: file-url-profiling.patch License: LGPLv2+ Group: Development/Libraries @@ -26,6 +27,7 @@ authentication, proxies and more. %setup -q -n urlgrabber-%{version} %patch1 -p1 %patch2 -p1 +%patch3 -p1 %build python setup.py build @@ -46,6 +48,9 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) /usr/libexec/urlgrabber-ext-down %changelog +* Fri May 18 2012 Zdeněk Pavlas - 3.9.1-13 +- fix file:// profiling. BZ 822632. + * Mon May 14 2012 Zdeněk Pavlas - 3.9.1-12 - Update to latest HEAD - Merge multi-downloader patches