|
|
|
@ -314,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
|
|
|
|
|
return (fb,lb)
|
|
|
|
|
|
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
|
|
|
index e090e90..37d1297 100644
|
|
|
|
|
index e090e90..05ea9c3 100644
|
|
|
|
|
--- a/urlgrabber/grabber.py
|
|
|
|
|
+++ b/urlgrabber/grabber.py
|
|
|
|
|
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
@ -345,12 +345,19 @@ index e090e90..37d1297 100644
|
|
|
|
|
text = None
|
|
|
|
|
|
|
|
|
|
specifies alternative text to be passed to the progress meter
|
|
|
|
|
@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
@@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
(which can be set on default_grabber.throttle) is used. See
|
|
|
|
|
BANDWIDTH THROTTLING for more information.
|
|
|
|
|
|
|
|
|
|
- timeout = None
|
|
|
|
|
+ timeout = 300
|
|
|
|
|
+
|
|
|
|
|
+ a positive integer expressing the number of seconds to wait before
|
|
|
|
|
+ timing out attempts to connect to a server. If the value is None
|
|
|
|
|
+ or 0, connection attempts will not time out. The timeout is passed
|
|
|
|
|
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see
|
|
|
|
|
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
|
|
|
|
|
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
|
|
|
|
|
|
|
|
|
|
- a positive float expressing the number of seconds to wait for socket
|
|
|
|
|
- operations. If the value is None or 0.0, socket operations will block
|
|
|
|
@ -358,16 +365,15 @@ index e090e90..37d1297 100644
|
|
|
|
|
- method on the Socket object used for the request. See the Python
|
|
|
|
|
- documentation on settimeout for more information.
|
|
|
|
|
- http://www.python.org/doc/current/lib/socket-objects.html
|
|
|
|
|
+ a positive integer expressing the number of seconds to wait before
|
|
|
|
|
+ timing out attempts to connect to a server. If the value is None
|
|
|
|
|
+ or 0, connection attempts will not time out. The timeout is passed
|
|
|
|
|
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see
|
|
|
|
|
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
|
|
|
|
|
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
|
|
|
|
|
+ minrate = 1000
|
|
|
|
|
+
|
|
|
|
|
+ This sets the low speed threshold in bytes per second. If the server
|
|
|
|
|
+ is sending data slower than this for at least `timeout' seconds, the
|
|
|
|
|
+ library aborts the connection.
|
|
|
|
|
|
|
|
|
|
bandwidth = 0
|
|
|
|
|
|
|
|
|
|
@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
@@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
note that proxy authentication information may be provided using
|
|
|
|
|
normal URL constructs:
|
|
|
|
|
proxies={ 'http' : 'http://user:host@foo:3128' }
|
|
|
|
@ -382,7 +388,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
prefix = None
|
|
|
|
|
|
|
|
|
|
@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
@@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
control, you should probably subclass URLParser and pass it in via
|
|
|
|
|
the 'urlparser' option.
|
|
|
|
|
|
|
|
|
@ -395,7 +401,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
ssl_ca_cert = None
|
|
|
|
|
|
|
|
|
|
this option can be used if M2Crypto is available and will be
|
|
|
|
|
@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
@@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
|
No-op when using the curl backend (default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -480,7 +486,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
RETRY RELATED ARGUMENTS
|
|
|
|
|
|
|
|
|
|
@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
|
|
|
|
|
@@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS
|
|
|
|
|
but it cannot (without severe trickiness) prevent the exception
|
|
|
|
|
from being raised.
|
|
|
|
|
|
|
|
|
@ -496,7 +502,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
interrupt_callback = None
|
|
|
|
|
|
|
|
|
|
This callback is called if KeyboardInterrupt is received at any
|
|
|
|
|
@@ -420,6 +486,7 @@ import time
|
|
|
|
|
@@ -420,6 +492,7 @@ import time
|
|
|
|
|
import string
|
|
|
|
|
import urllib
|
|
|
|
|
import urllib2
|
|
|
|
@ -504,7 +510,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
import mimetools
|
|
|
|
|
import thread
|
|
|
|
|
import types
|
|
|
|
|
@@ -428,9 +495,17 @@ import pycurl
|
|
|
|
|
@@ -428,9 +501,17 @@ import pycurl
|
|
|
|
|
from ftplib import parse150
|
|
|
|
|
from StringIO import StringIO
|
|
|
|
|
from httplib import HTTPException
|
|
|
|
@ -523,7 +529,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
########################################################################
|
|
|
|
|
# MODULE INITIALIZATION
|
|
|
|
|
########################################################################
|
|
|
|
|
@@ -439,6 +514,12 @@ try:
|
|
|
|
|
@@ -439,6 +520,12 @@ try:
|
|
|
|
|
except:
|
|
|
|
|
__version__ = '???'
|
|
|
|
|
|
|
|
|
@ -536,7 +542,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
########################################################################
|
|
|
|
|
# functions for debugging output. These functions are here because they
|
|
|
|
|
# are also part of the module initialization.
|
|
|
|
|
@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None):
|
|
|
|
|
@@ -504,6 +591,7 @@ def _init_default_logger(logspec=None):
|
|
|
|
|
else: handler = logging.FileHandler(filename)
|
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
|
DBOBJ = logging.getLogger('urlgrabber')
|
|
|
|
@ -544,7 +550,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
DBOBJ.addHandler(handler)
|
|
|
|
|
DBOBJ.setLevel(level)
|
|
|
|
|
except (KeyError, ImportError, ValueError):
|
|
|
|
|
@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None):
|
|
|
|
|
@@ -512,8 +600,8 @@ def _init_default_logger(logspec=None):
|
|
|
|
|
|
|
|
|
|
def _log_package_state():
|
|
|
|
|
if not DEBUG: return
|
|
|
|
@ -555,7 +561,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
_init_default_logger()
|
|
|
|
|
_log_package_state()
|
|
|
|
|
@@ -527,6 +609,29 @@ def _(st):
|
|
|
|
|
@@ -527,6 +615,29 @@ def _(st):
|
|
|
|
|
# END MODULE INITIALIZATION
|
|
|
|
|
########################################################################
|
|
|
|
|
|
|
|
|
@ -585,7 +591,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class URLGrabError(IOError):
|
|
|
|
|
@@ -662,6 +767,7 @@ class URLParser:
|
|
|
|
|
@@ -662,6 +773,7 @@ class URLParser:
|
|
|
|
|
opts.quote = 0 --> do not quote it
|
|
|
|
|
opts.quote = None --> guess
|
|
|
|
|
"""
|
|
|
|
@ -593,7 +599,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
quote = opts.quote
|
|
|
|
|
|
|
|
|
|
if opts.prefix:
|
|
|
|
|
@@ -768,6 +874,41 @@ class URLGrabberOptions:
|
|
|
|
|
@@ -768,6 +880,41 @@ class URLGrabberOptions:
|
|
|
|
|
else: # throttle is a float
|
|
|
|
|
return self.bandwidth * self.throttle
|
|
|
|
|
|
|
|
|
@ -635,7 +641,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
def derive(self, **kwargs):
|
|
|
|
|
"""Create a derived URLGrabberOptions instance.
|
|
|
|
|
This method creates a new instance and overrides the
|
|
|
|
|
@@ -791,30 +932,37 @@ class URLGrabberOptions:
|
|
|
|
|
@@ -791,30 +938,38 @@ class URLGrabberOptions:
|
|
|
|
|
provided here.
|
|
|
|
|
"""
|
|
|
|
|
self.progress_obj = None
|
|
|
|
@ -663,6 +669,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
self.cache_openers = True
|
|
|
|
|
- self.timeout = None
|
|
|
|
|
+ self.timeout = 300
|
|
|
|
|
+ self.minrate = None
|
|
|
|
|
self.text = None
|
|
|
|
|
self.http_headers = None
|
|
|
|
|
self.ftp_headers = None
|
|
|
|
@ -674,7 +681,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
|
|
|
|
|
self.ssl_context = None # no-op in pycurl
|
|
|
|
|
self.ssl_verify_peer = True # check peer's cert for authenticityb
|
|
|
|
|
@@ -827,6 +975,12 @@ class URLGrabberOptions:
|
|
|
|
|
@@ -827,6 +982,12 @@ class URLGrabberOptions:
|
|
|
|
|
self.size = None # if we know how big the thing we're getting is going
|
|
|
|
|
# to be. this is ultimately a MAXIMUM size for the file
|
|
|
|
|
self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
|
|
|
|
@ -687,7 +694,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return self.format()
|
|
|
|
|
@@ -846,7 +1000,18 @@ class URLGrabberOptions:
|
|
|
|
|
@@ -846,7 +1007,18 @@ class URLGrabberOptions:
|
|
|
|
|
s = s + indent + '}'
|
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
@ -707,7 +714,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
"""Provides easy opening of URLs with a variety of options.
|
|
|
|
|
|
|
|
|
|
All options are specified as kwargs. Options may be specified when
|
|
|
|
|
@@ -872,7 +1037,6 @@ class URLGrabber:
|
|
|
|
|
@@ -872,7 +1044,6 @@ class URLGrabber:
|
|
|
|
|
# beware of infinite loops :)
|
|
|
|
|
tries = tries + 1
|
|
|
|
|
exception = None
|
|
|
|
@ -715,7 +722,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
callback = None
|
|
|
|
|
if DEBUG: DEBUG.info('attempt %i/%s: %s',
|
|
|
|
|
tries, opts.retry, args[0])
|
|
|
|
|
@@ -883,54 +1047,62 @@ class URLGrabber:
|
|
|
|
|
@@ -883,54 +1054,62 @@ class URLGrabber:
|
|
|
|
|
except URLGrabError, e:
|
|
|
|
|
exception = e
|
|
|
|
|
callback = opts.failure_callback
|
|
|
|
@ -785,7 +792,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
if scheme == 'file' and not opts.copy_local:
|
|
|
|
|
# just return the name of the local file - don't make a
|
|
|
|
|
# copy currently
|
|
|
|
|
@@ -950,41 +1122,51 @@ class URLGrabber:
|
|
|
|
|
@@ -950,41 +1129,51 @@ class URLGrabber:
|
|
|
|
|
|
|
|
|
|
elif not opts.range:
|
|
|
|
|
if not opts.checkfunc is None:
|
|
|
|
@ -852,7 +859,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
if limit is not None:
|
|
|
|
|
limit = limit + 1
|
|
|
|
|
|
|
|
|
|
@@ -1000,12 +1182,8 @@ class URLGrabber:
|
|
|
|
|
@@ -1000,12 +1189,8 @@ class URLGrabber:
|
|
|
|
|
else: s = fo.read(limit)
|
|
|
|
|
|
|
|
|
|
if not opts.checkfunc is None:
|
|
|
|
@ -867,7 +874,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
finally:
|
|
|
|
|
fo.close()
|
|
|
|
|
return s
|
|
|
|
|
@@ -1020,6 +1198,7 @@ class URLGrabber:
|
|
|
|
|
@@ -1020,6 +1205,7 @@ class URLGrabber:
|
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
def _make_callback(self, callback_obj):
|
|
|
|
@ -875,7 +882,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
if callable(callback_obj):
|
|
|
|
|
return callback_obj, (), {}
|
|
|
|
|
else:
|
|
|
|
|
@@ -1030,7 +1209,7 @@ class URLGrabber:
|
|
|
|
|
@@ -1030,7 +1216,7 @@ class URLGrabber:
|
|
|
|
|
default_grabber = URLGrabber()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -884,7 +891,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
def __init__(self, url, filename, opts):
|
|
|
|
|
self.fo = None
|
|
|
|
|
self._hdr_dump = ''
|
|
|
|
|
@@ -1052,10 +1231,13 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1052,10 +1238,13 @@ class PyCurlFileObject():
|
|
|
|
|
self._reget_length = 0
|
|
|
|
|
self._prog_running = False
|
|
|
|
|
self._error = (None, None)
|
|
|
|
@ -900,7 +907,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
def __getattr__(self, name):
|
|
|
|
|
"""This effectively allows us to wrap at the instance level.
|
|
|
|
|
Any attribute not found in _this_ object will be searched for
|
|
|
|
|
@@ -1067,6 +1249,12 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1067,6 +1256,12 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
def _retrieve(self, buf):
|
|
|
|
|
try:
|
|
|
|
@ -913,7 +920,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
if not self._prog_running:
|
|
|
|
|
if self.opts.progress_obj:
|
|
|
|
|
size = self.size + self._reget_length
|
|
|
|
|
@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1079,23 +1274,40 @@ class PyCurlFileObject():
|
|
|
|
|
self.opts.progress_obj.update(self._amount_read)
|
|
|
|
|
|
|
|
|
|
self._amount_read += len(buf)
|
|
|
|
@ -960,7 +967,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
elif self.scheme in ['ftp']:
|
|
|
|
|
s = None
|
|
|
|
|
if buf.startswith('213 '):
|
|
|
|
|
@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1104,7 +1316,18 @@ class PyCurlFileObject():
|
|
|
|
|
s = parse150(buf)
|
|
|
|
|
if s:
|
|
|
|
|
self.size = int(s)
|
|
|
|
@ -980,7 +987,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
return len(buf)
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
return pycurl.READFUNC_ABORT
|
|
|
|
|
@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1113,8 +1336,10 @@ class PyCurlFileObject():
|
|
|
|
|
if self._parsed_hdr:
|
|
|
|
|
return self._parsed_hdr
|
|
|
|
|
statusend = self._hdr_dump.find('\n')
|
|
|
|
@ -991,7 +998,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
self._parsed_hdr = mimetools.Message(hdrfp)
|
|
|
|
|
return self._parsed_hdr
|
|
|
|
|
|
|
|
|
|
@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1127,6 +1352,9 @@ class PyCurlFileObject():
|
|
|
|
|
if not opts:
|
|
|
|
|
opts = self.opts
|
|
|
|
|
|
|
|
|
@ -1001,7 +1008,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
# defaults we're always going to set
|
|
|
|
|
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
|
|
|
|
|
@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1136,11 +1364,21 @@ class PyCurlFileObject():
|
|
|
|
|
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
|
|
|
|
|
self.curl_obj.setopt(pycurl.FAILONERROR, True)
|
|
|
|
|
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
|
|
|
|
@ -1024,7 +1031,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
# maybe to be options later
|
|
|
|
|
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
|
|
|
|
|
@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1148,9 +1386,11 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
# timeouts
|
|
|
|
|
timeout = 300
|
|
|
|
@ -1034,12 +1041,12 @@ index e090e90..37d1297 100644
|
|
|
|
|
+ if hasattr(opts, 'timeout'):
|
|
|
|
|
+ timeout = int(opts.timeout or 0)
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000)
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
|
|
|
|
|
|
|
|
|
|
# ssl options
|
|
|
|
|
if self.scheme == 'https':
|
|
|
|
|
@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1158,13 +1398,16 @@ class PyCurlFileObject():
|
|
|
|
|
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
|
|
|
|
|
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
|
|
|
|
|
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
|
|
|
|
@ -1057,7 +1064,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
if opts.ssl_cert_type:
|
|
|
|
|
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
|
|
|
|
|
if opts.ssl_key_pass:
|
|
|
|
|
@@ -1187,28 +1423,26 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1187,28 +1430,26 @@ class PyCurlFileObject():
|
|
|
|
|
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
|
|
|
|
|
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
|
|
|
|
|
|
|
|
|
@ -1102,7 +1109,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
# our url
|
|
|
|
|
self.curl_obj.setopt(pycurl.URL, self.url)
|
|
|
|
|
@@ -1228,39 +1462,26 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1228,39 +1469,26 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
code = self.http_code
|
|
|
|
|
errcode = e.args[0]
|
|
|
|
@ -1148,7 +1155,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
# this is probably wrong but ultimately this is what happens
|
|
|
|
|
# we have a legit http code and a pycurl 'writer failed' code
|
|
|
|
|
# which almost always means something aborted it from outside
|
|
|
|
|
@@ -1269,36 +1490,70 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1269,36 +1497,70 @@ class PyCurlFileObject():
|
|
|
|
|
# figure out what aborted the pycurl process FIXME
|
|
|
|
|
raise KeyboardInterrupt
|
|
|
|
|
|
|
|
|
@ -1244,7 +1251,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
def _do_open(self):
|
|
|
|
|
self.curl_obj = _curl_cache
|
|
|
|
|
@@ -1333,7 +1588,11 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1333,7 +1595,11 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
if self.opts.range:
|
|
|
|
|
rt = self.opts.range
|
|
|
|
@ -1257,7 +1264,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
if rt:
|
|
|
|
|
header = range_tuple_to_header(rt)
|
|
|
|
|
@@ -1434,21 +1693,46 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1434,21 +1700,46 @@ class PyCurlFileObject():
|
|
|
|
|
#fh, self._temp_name = mkstemp()
|
|
|
|
|
#self.fo = open(self._temp_name, 'wb')
|
|
|
|
|
|
|
|
|
@ -1311,7 +1318,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
else:
|
|
|
|
|
#self.fo = open(self._temp_name, 'r')
|
|
|
|
|
self.fo.seek(0)
|
|
|
|
|
@@ -1526,17 +1810,20 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1526,17 +1817,20 @@ class PyCurlFileObject():
|
|
|
|
|
if self._prog_running:
|
|
|
|
|
downloaded += self._reget_length
|
|
|
|
|
self.opts.progress_obj.update(downloaded)
|
|
|
|
@ -1337,7 +1344,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
msg = _("Downloaded more than max size for %s: %s > %s") \
|
|
|
|
|
% (self.url, cur, max_size)
|
|
|
|
|
@@ -1544,13 +1831,6 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1544,13 +1838,6 @@ class PyCurlFileObject():
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
@ -1351,7 +1358,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
def read(self, amt=None):
|
|
|
|
|
self._fill_buffer(amt)
|
|
|
|
|
if amt is None:
|
|
|
|
|
@@ -1582,9 +1862,21 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1582,9 +1869,21 @@ class PyCurlFileObject():
|
|
|
|
|
self.opts.progress_obj.end(self._amount_read)
|
|
|
|
|
self.fo.close()
|
|
|
|
|
|
|
|
|
@ -1374,7 +1381,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
|
# DEPRECATED FUNCTIONS
|
|
|
|
|
@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
@ -1480,7 +1487,7 @@ index e090e90..37d1297 100644
|
|
|
|
|
+ # list of options we pass to downloader
|
|
|
|
|
+ _options = (
|
|
|
|
|
+ 'url', 'filename',
|
|
|
|
|
+ 'timeout', 'close_connection', 'keepalive',
|
|
|
|
|
+ 'timeout', 'minrate', 'close_connection', 'keepalive',
|
|
|
|
|
+ 'throttle', 'bandwidth', 'range', 'reget',
|
|
|
|
|
+ 'user_agent', 'http_headers', 'ftp_headers',
|
|
|
|
|
+ 'proxy', 'prefix', 'username', 'password',
|
|
|
|
@ -2022,7 +2029,7 @@ index dad410b..7975f1b 100644
|
|
|
|
|
def urlopen(self, url, **kwargs):
|
|
|
|
|
kw = dict(kwargs)
|
|
|
|
|
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
|
|
|
|
|
index dd07c6a..077fd99 100644
|
|
|
|
|
index dd07c6a..b456a0c 100644
|
|
|
|
|
--- a/urlgrabber/progress.py
|
|
|
|
|
+++ b/urlgrabber/progress.py
|
|
|
|
|
@@ -133,8 +133,8 @@ class BaseMeter:
|
|
|
|
@ -2048,7 +2055,7 @@ index dd07c6a..077fd99 100644
|
|
|
|
|
+ return tl.add(' [%-*.*s]' % (blen, blen, bar))
|
|
|
|
|
+
|
|
|
|
|
+def _term_add_end(tl, osize, size):
|
|
|
|
|
+ if osize is not None:
|
|
|
|
|
+ if osize: # osize should be None or >0, but that's been broken.
|
|
|
|
|
+ if size > osize: # Is ??? better? Really need something to say < vs >.
|
|
|
|
|
+ return tl.add(' !!! '), True
|
|
|
|
|
+ elif size != osize:
|
|
|
|
|