You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-urlgrabber/file-url-profiling.patch

66 lines
2.4 KiB

commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8
Author: Zdeněk Pavlas <zpavlas@redhat.com>
Date: Fri May 18 15:38:44 2012 +0200
timedhosts: fix file:// profiling. BZ 822632.
- Do not profile absolute file:// URLs.
- Give a hint to _TH.update() which baseurl was used
so we may profile file:// mirrors, too.
- Strip username and password from stored hostnames.
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 094be77..be85f92 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -2060,7 +2060,7 @@ class _ExternalDownloader:
else:
ug_err = URLGrabError(int(line[4]), line[5])
if DEBUG: DEBUG.info('failure: %s', err)
- _TH.update(opts.url, int(line[2]), float(line[3]), ug_err)
+ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
ret.append((opts, size, ug_err))
return ret
@@ -2268,7 +2268,7 @@ class _TH:
if filename and _TH.dirty is None:
try:
for line in open(filename):
- host, speed, fail, ts = line.split()
+ host, speed, fail, ts = line.split(' ', 3)
_TH.hosts[host] = int(speed), int(fail), int(ts)
except IOError: pass
_TH.dirty = False
@@ -2288,9 +2288,14 @@ class _TH:
_TH.dirty = False
@staticmethod
- def update(url, dl_size, dl_time, ug_err):
+ def update(url, dl_size, dl_time, ug_err, baseurl=None):
_TH.load()
- host = urlparse.urlsplit(url).netloc
+
+ # Use hostname from URL. If it's a file:// URL, use baseurl.
+ # If no baseurl, do not update timedhosts.
+ host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
+ if not host: return
+
speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
now = time.time()
@@ -2311,9 +2316,12 @@ class _TH:
_TH.dirty = True
@staticmethod
- def estimate(url):
+ def estimate(baseurl):
_TH.load()
- host = urlparse.urlsplit(url).netloc
+
+ # Use just the hostname, unless it's a file:// baseurl.
+ host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
+
default_speed = default_grabber.opts.default_speed
try: speed, fail, ts = _TH.hosts[host]
except KeyError: return default_speed