|
|
|
@ -402,7 +402,7 @@ index 3e5f3b7..5efa160 100644
|
|
|
|
|
return (fb,lb)
|
|
|
|
|
|
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
|
|
|
index e090e90..30a8bdb 100644
|
|
|
|
|
index e090e90..63baef7 100644
|
|
|
|
|
--- a/urlgrabber/grabber.py
|
|
|
|
|
+++ b/urlgrabber/grabber.py
|
|
|
|
|
@@ -35,7 +35,7 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
@ -1525,7 +1525,7 @@ index e090e90..30a8bdb 100644
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
|
# DEPRECATED FUNCTIONS
|
|
|
|
|
@@ -1621,6 +1939,490 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
@@ -1621,6 +1939,492 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
@ -1806,9 +1806,11 @@ index e090e90..30a8bdb 100644
|
|
|
|
|
+
|
|
|
|
|
+ if ug_err is None:
|
|
|
|
|
+ continue
|
|
|
|
|
+ if ug_err.errno == pycurl.E_OPERATION_TIMEOUTED:
|
|
|
|
|
+ if limit > 1 and ug_err.errno in (pycurl.E_OPERATION_TIMEOUTED,
|
|
|
|
|
+ pycurl.E_COULDNT_CONNECT):
|
|
|
|
|
+ # One possible cause is connection-limited server.
|
|
|
|
|
+ # Turn on the max_connections=1 override. BZ 853432
|
|
|
|
|
+ DEBUG.info('max_connections(%s) %d => 1', key, limit)
|
|
|
|
|
+ single.add(key)
|
|
|
|
|
+
|
|
|
|
|
+ retry = opts.retry or 0
|
|
|
|
@ -1899,7 +1901,7 @@ index e090e90..30a8bdb 100644
|
|
|
|
|
+
|
|
|
|
|
+ # update the current mirror and limit
|
|
|
|
|
+ key = best['mirror']
|
|
|
|
|
+ limit = best.get('kwargs', {}).get('max_connections') or 2
|
|
|
|
|
+ limit = best.get('kwargs', {}).get('max_connections')
|
|
|
|
|
+ opts.async = key, limit
|
|
|
|
|
+
|
|
|
|
|
+ # update URL and proxy
|
|
|
|
@ -1912,7 +1914,7 @@ index e090e90..30a8bdb 100644
|
|
|
|
|
+ key, limit = opts.async
|
|
|
|
|
+ if key in single:
|
|
|
|
|
+ limit = 1
|
|
|
|
|
+ while host_con.get(key, 0) >= limit:
|
|
|
|
|
+ while host_con.get(key, 0) >= (limit or 2):
|
|
|
|
|
+ perform()
|
|
|
|
|
+ if DEBUG:
|
|
|
|
|
+ DEBUG.info('max_connections(%s): %d/%d', key, host_con.get(key, 0), limit)
|
|
|
|
@ -2187,7 +2189,7 @@ index dad410b..988a309 100644
|
|
|
|
|
def urlopen(self, url, **kwargs):
|
|
|
|
|
kw = dict(kwargs)
|
|
|
|
|
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
|
|
|
|
|
index dd07c6a..b456a0c 100644
|
|
|
|
|
index dd07c6a..5d148f0 100644
|
|
|
|
|
--- a/urlgrabber/progress.py
|
|
|
|
|
+++ b/urlgrabber/progress.py
|
|
|
|
|
@@ -133,8 +133,8 @@ class BaseMeter:
|
|
|
|
@ -2512,7 +2514,24 @@ index dd07c6a..b456a0c 100644
|
|
|
|
|
|
|
|
|
|
######################################################################
|
|
|
|
|
# support classes and functions
|
|
|
|
|
@@ -658,6 +741,8 @@ def format_time(seconds, use_hours=0):
|
|
|
|
|
@@ -563,10 +646,14 @@ class RateEstimator:
|
|
|
|
|
|
|
|
|
|
def update(self, amount_read, now=None):
|
|
|
|
|
if now is None: now = time.time()
|
|
|
|
|
- if amount_read == 0:
|
|
|
|
|
+ # libcurl calls the progress callback when fetching headers
|
|
|
|
|
+ # too, thus amount_read = 0 .. hdr_size .. 0 .. content_size.
|
|
|
|
|
+ # Ocassionally we miss the 2nd zero and report avg speed < 0.
|
|
|
|
|
+ # Handle read_diff < 0 here. BZ 1001767.
|
|
|
|
|
+ if amount_read == 0 or amount_read < self.last_amount_read:
|
|
|
|
|
# if we just started this file, all bets are off
|
|
|
|
|
self.last_update_time = now
|
|
|
|
|
- self.last_amount_read = 0
|
|
|
|
|
+ self.last_amount_read = amount_read
|
|
|
|
|
self.ave_rate = None
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
@@ -658,6 +745,8 @@ def format_time(seconds, use_hours=0):
|
|
|
|
|
if seconds is None or seconds < 0:
|
|
|
|
|
if use_hours: return '--:--:--'
|
|
|
|
|
else: return '--:--'
|
|
|
|
@ -2521,7 +2540,7 @@ index dd07c6a..b456a0c 100644
|
|
|
|
|
else:
|
|
|
|
|
seconds = int(seconds)
|
|
|
|
|
minutes = seconds / 60
|
|
|
|
|
@@ -722,9 +807,77 @@ def _tst(fn, cur, tot, beg, size, *args):
|
|
|
|
|
@@ -722,9 +811,77 @@ def _tst(fn, cur, tot, beg, size, *args):
|
|
|
|
|
time.sleep(delay)
|
|
|
|
|
tm.end(size)
|
|
|
|
|
|
|
|
|
|