commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8
Author: Zdeněk Pavlas <zpavlas@redhat.com>
Date:   Fri May 18 15:38:44 2012 +0200

    timedhosts: fix file:// profiling.  BZ 822632.
    
    - Do not profile absolute file:// URLs.
    - Give a hint to _TH.update() which baseurl was used
      so we may profile file:// mirrors, too.
    - Strip username and password from stored hostnames.

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 094be77..be85f92 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -2060,7 +2060,7 @@ class _ExternalDownloader:
             else:
                 ug_err = URLGrabError(int(line[4]), line[5])
                 if DEBUG: DEBUG.info('failure: %s', err)
-            _TH.update(opts.url, int(line[2]), float(line[3]), ug_err)
+            _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
             ret.append((opts, size, ug_err))
         return ret
 
@@ -2268,7 +2268,7 @@ class _TH:
         if filename and _TH.dirty is None:
             try:
                 for line in open(filename):
-                    host, speed, fail, ts = line.split()
+                    host, speed, fail, ts = line.split(' ', 3)
                     _TH.hosts[host] = int(speed), int(fail), int(ts)
             except IOError: pass
             _TH.dirty = False
@@ -2288,9 +2288,14 @@ class _TH:
             _TH.dirty = False
 
     @staticmethod
-    def update(url, dl_size, dl_time, ug_err):
+    def update(url, dl_size, dl_time, ug_err, baseurl=None):
         _TH.load()
-        host = urlparse.urlsplit(url).netloc
+
+        # Use hostname from URL.  If it's a file:// URL, use baseurl.
+        # If no baseurl, do not update timedhosts.
+        host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
+        if not host: return
+
         speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
         now = time.time()
 
@@ -2311,9 +2316,12 @@ class _TH:
         _TH.dirty = True
 
     @staticmethod
-    def estimate(url):
+    def estimate(baseurl):
         _TH.load()
-        host = urlparse.urlsplit(url).netloc
+
+        # Use just the hostname, unless it's a file:// baseurl.
+        host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
+
         default_speed = default_grabber.opts.default_speed
         try: speed, fail, ts = _TH.hosts[host]
         except KeyError: return default_speed
commit fa6a17c29e9dea3ccd2d384039b305f027a5b75e
Author: Zdeněk Pavlas <zpavlas@redhat.com>
Date:   Mon May 21 09:06:13 2012 +0200

    timedhosts: sanity check on dl_time
    
    - handle the dl_time <= 0 case
    
    - relative validity of calculated speed now depends
      on dl_time instead of dl_size.  (that's where the
      random error is)

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index be85f92..73e14aa 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -2301,11 +2301,12 @@ class _TH:
 
         if ug_err is None:
             # k1: the older, the less useful
-            # k2: if it was <1MiB, don't trust it much
+            # k2: <500ms readings are less reliable
             # speeds vary, use 10:1 smoothing
             k1 = 2**((ts - now) / default_grabber.opts.half_life)
-            k2 = min(dl_size / 1e6, 1.0) / 10
-            speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
+            k2 = min(dl_time / .500, 1.0) / 10
+            if k2 > 0:
+                speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2)
             fail = 0
         elif getattr(ug_err, 'code', None) == 404:
             fail = 0 # alive, at least