commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8 Author: Zdeněk Pavlas Date: Fri May 18 15:38:44 2012 +0200 timedhosts: fix file:// profiling. BZ 822632. - Do not profile absolute file:// URLs. - Give a hint to _TH.update() which baseurl was used so we may profile file:// mirrors, too. - Strip username and password from stored hostnames. diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index 094be77..be85f92 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -2060,7 +2060,7 @@ class _ExternalDownloader: else: ug_err = URLGrabError(int(line[4]), line[5]) if DEBUG: DEBUG.info('failure: %s', err) - _TH.update(opts.url, int(line[2]), float(line[3]), ug_err) + _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) ret.append((opts, size, ug_err)) return ret @@ -2268,7 +2268,7 @@ class _TH: if filename and _TH.dirty is None: try: for line in open(filename): - host, speed, fail, ts = line.split() + host, speed, fail, ts = line.split(' ', 3) _TH.hosts[host] = int(speed), int(fail), int(ts) except IOError: pass _TH.dirty = False @@ -2288,9 +2288,14 @@ class _TH: _TH.dirty = False @staticmethod - def update(url, dl_size, dl_time, ug_err): + def update(url, dl_size, dl_time, ug_err, baseurl=None): _TH.load() - host = urlparse.urlsplit(url).netloc + + # Use hostname from URL. If it's a file:// URL, use baseurl. + # If no baseurl, do not update timedhosts. + host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl + if not host: return + speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0) now = time.time() @@ -2311,9 +2316,12 @@ class _TH: _TH.dirty = True @staticmethod - def estimate(url): + def estimate(baseurl): _TH.load() - host = urlparse.urlsplit(url).netloc + + # Use just the hostname, unless it's a file:// baseurl. + host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl + default_speed = default_grabber.opts.default_speed try: speed, fail, ts = _TH.hosts[host] except KeyError: return default_speed