You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
3.6 KiB
98 lines
3.6 KiB
commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8
|
|
Author: Zdeněk Pavlas <zpavlas@redhat.com>
|
|
Date: Fri May 18 15:38:44 2012 +0200
|
|
|
|
timedhosts: fix file:// profiling. BZ 822632.
|
|
|
|
- Do not profile absolute file:// URLs.
|
|
- Give a hint to _TH.update() which baseurl was used
|
|
so we may profile file:// mirrors, too.
|
|
- Strip username and password from stored hostnames.
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
index 094be77..be85f92 100644
|
|
--- a/urlgrabber/grabber.py
|
|
+++ b/urlgrabber/grabber.py
|
|
@@ -2060,7 +2060,7 @@ class _ExternalDownloader:
|
|
else:
|
|
ug_err = URLGrabError(int(line[4]), line[5])
|
|
if DEBUG: DEBUG.info('failure: %s', err)
|
|
- _TH.update(opts.url, int(line[2]), float(line[3]), ug_err)
|
|
+ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
|
|
ret.append((opts, size, ug_err))
|
|
return ret
|
|
|
|
@@ -2268,7 +2268,7 @@ class _TH:
|
|
if filename and _TH.dirty is None:
|
|
try:
|
|
for line in open(filename):
|
|
- host, speed, fail, ts = line.split()
|
|
+ host, speed, fail, ts = line.split(' ', 3)
|
|
_TH.hosts[host] = int(speed), int(fail), int(ts)
|
|
except IOError: pass
|
|
_TH.dirty = False
|
|
@@ -2288,9 +2288,14 @@ class _TH:
|
|
_TH.dirty = False
|
|
|
|
@staticmethod
|
|
- def update(url, dl_size, dl_time, ug_err):
|
|
+ def update(url, dl_size, dl_time, ug_err, baseurl=None):
|
|
_TH.load()
|
|
- host = urlparse.urlsplit(url).netloc
|
|
+
|
|
+ # Use hostname from URL. If it's a file:// URL, use baseurl.
|
|
+ # If no baseurl, do not update timedhosts.
|
|
+ host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
|
|
+ if not host: return
|
|
+
|
|
speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
|
|
now = time.time()
|
|
|
|
@@ -2311,9 +2316,12 @@ class _TH:
|
|
_TH.dirty = True
|
|
|
|
@staticmethod
|
|
- def estimate(url):
|
|
+ def estimate(baseurl):
|
|
_TH.load()
|
|
- host = urlparse.urlsplit(url).netloc
|
|
+
|
|
+ # Use just the hostname, unless it's a file:// baseurl.
|
|
+ host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
|
|
+
|
|
default_speed = default_grabber.opts.default_speed
|
|
try: speed, fail, ts = _TH.hosts[host]
|
|
except KeyError: return default_speed
|
|
commit fa6a17c29e9dea3ccd2d384039b305f027a5b75e
|
|
Author: Zdeněk Pavlas <zpavlas@redhat.com>
|
|
Date: Mon May 21 09:06:13 2012 +0200
|
|
|
|
timedhosts: sanity check on dl_time
|
|
|
|
- handle the dl_time <= 0 case
|
|
|
|
- relative validity of calculated speed now depends
|
|
on dl_time instead of dl_size. (that's where the
|
|
random error is)
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
index be85f92..73e14aa 100644
|
|
--- a/urlgrabber/grabber.py
|
|
+++ b/urlgrabber/grabber.py
|
|
@@ -2301,11 +2301,12 @@ class _TH:
|
|
|
|
if ug_err is None:
|
|
# k1: the older, the less useful
|
|
- # k2: if it was <1MiB, don't trust it much
|
|
+ # k2: <500ms readings are less reliable
|
|
# speeds vary, use 10:1 smoothing
|
|
k1 = 2**((ts - now) / default_grabber.opts.half_life)
|
|
- k2 = min(dl_size / 1e6, 1.0) / 10
|
|
- speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
|
|
+ k2 = min(dl_time / .500, 1.0) / 10
|
|
+ if k2 > 0:
|
|
+ speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
|
|
fail = 0
|
|
elif getattr(ug_err, 'code', None) == 404:
|
|
fail = 0 # alive, at least
|