|
|
|
@ -275,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
|
|
|
|
|
return (fb,lb)
|
|
|
|
|
|
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
|
|
|
index e090e90..1afb2c5 100644
|
|
|
|
|
index e090e90..9526dc1 100644
|
|
|
|
|
--- a/urlgrabber/grabber.py
|
|
|
|
|
+++ b/urlgrabber/grabber.py
|
|
|
|
|
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
@ -874,7 +874,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
if not self._prog_running:
|
|
|
|
|
if self.opts.progress_obj:
|
|
|
|
|
size = self.size + self._reget_length
|
|
|
|
|
@@ -1079,15 +1267,24 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
|
|
|
|
|
self.opts.progress_obj.update(self._amount_read)
|
|
|
|
|
|
|
|
|
|
self._amount_read += len(buf)
|
|
|
|
@ -899,9 +899,29 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
- return -1
|
|
|
|
|
+ return -1
|
|
|
|
|
try:
|
|
|
|
|
self._hdr_dump += buf
|
|
|
|
|
- self._hdr_dump += buf
|
|
|
|
|
# we have to get the size before we do the progress obj start
|
|
|
|
|
@@ -1104,7 +1301,17 @@ class PyCurlFileObject():
|
|
|
|
|
# but we can't do that w/o making it do 2 connects, which sucks
|
|
|
|
|
# so we cheat and stuff it in here in the hdr_retrieve
|
|
|
|
|
- if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
|
|
|
|
|
- length = buf.split(':')[1]
|
|
|
|
|
- self.size = int(length)
|
|
|
|
|
+ if self.scheme in ['http','https']:
|
|
|
|
|
+ if buf.lower().find('content-length') != -1:
|
|
|
|
|
+ length = buf.split(':')[1]
|
|
|
|
|
+ self.size = int(length)
|
|
|
|
|
+ elif self.append and self._hdr_dump == '' and ' 200 ' in buf:
|
|
|
|
|
+ # reget was attempted but server sends it all
|
|
|
|
|
+ # undo what we did in _build_range()
|
|
|
|
|
+ self.append = False
|
|
|
|
|
+ self.reget_time = None
|
|
|
|
|
+ self._amount_read = 0
|
|
|
|
|
+ self._reget_length = 0
|
|
|
|
|
+ self.fo.truncate(0)
|
|
|
|
|
elif self.scheme in ['ftp']:
|
|
|
|
|
s = None
|
|
|
|
|
if buf.startswith('213 '):
|
|
|
|
|
@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
|
|
|
|
|
s = parse150(buf)
|
|
|
|
|
if s:
|
|
|
|
|
self.size = int(s)
|
|
|
|
@ -913,6 +933,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
+ self.scheme = urlparse.urlsplit(location)[0]
|
|
|
|
|
+ self.url = location
|
|
|
|
|
+
|
|
|
|
|
+ self._hdr_dump += buf
|
|
|
|
|
+ if len(self._hdr_dump) != 0 and buf == '\r\n':
|
|
|
|
|
+ self._hdr_ended = True
|
|
|
|
|
+ if DEBUG: DEBUG.debug('header ended:')
|
|
|
|
@ -920,7 +941,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
return len(buf)
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
return pycurl.READFUNC_ABORT
|
|
|
|
|
@@ -1113,8 +1320,10 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
|
|
|
|
|
if self._parsed_hdr:
|
|
|
|
|
return self._parsed_hdr
|
|
|
|
|
statusend = self._hdr_dump.find('\n')
|
|
|
|
@ -931,7 +952,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
self._parsed_hdr = mimetools.Message(hdrfp)
|
|
|
|
|
return self._parsed_hdr
|
|
|
|
|
|
|
|
|
|
@@ -1127,6 +1336,9 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
|
|
|
|
|
if not opts:
|
|
|
|
|
opts = self.opts
|
|
|
|
|
|
|
|
|
@ -941,7 +962,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
# defaults we're always going to set
|
|
|
|
|
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
|
|
|
|
|
@@ -1136,11 +1348,21 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
|
|
|
|
|
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
|
|
|
|
|
self.curl_obj.setopt(pycurl.FAILONERROR, True)
|
|
|
|
|
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
|
|
|
|
@ -964,7 +985,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
# maybe to be options later
|
|
|
|
|
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
|
|
|
|
|
@@ -1148,9 +1370,11 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
# timeouts
|
|
|
|
|
timeout = 300
|
|
|
|
@ -979,7 +1000,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
# ssl options
|
|
|
|
|
if self.scheme == 'https':
|
|
|
|
|
@@ -1158,13 +1382,16 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
|
|
|
|
|
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
|
|
|
|
|
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
|
|
|
|
|
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
|
|
|
|
@ -997,7 +1018,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
if opts.ssl_cert_type:
|
|
|
|
|
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
|
|
|
|
|
if opts.ssl_key_pass:
|
|
|
|
|
@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
|
|
|
|
|
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
|
|
|
|
|
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
|
|
|
|
|
|
|
|
|
@ -1044,7 +1065,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
# our url
|
|
|
|
|
self.curl_obj.setopt(pycurl.URL, self.url)
|
|
|
|
|
@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
code = self.http_code
|
|
|
|
|
errcode = e.args[0]
|
|
|
|
@ -1090,7 +1111,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
# this is probably wrong but ultimately this is what happens
|
|
|
|
|
# we have a legit http code and a pycurl 'writer failed' code
|
|
|
|
|
# which almost always means something aborted it from outside
|
|
|
|
|
@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
|
|
|
|
|
# figure out what aborted the pycurl process FIXME
|
|
|
|
|
raise KeyboardInterrupt
|
|
|
|
|
|
|
|
|
@ -1186,14 +1207,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
def _do_open(self):
|
|
|
|
|
self.curl_obj = _curl_cache
|
|
|
|
|
- self.curl_obj.reset() # reset all old settings away, just in case
|
|
|
|
|
+ # reset() clears PYCURL_ERRORBUFFER, and there's no way
|
|
|
|
|
+ # to reinitialize it, so better don't do that. BZ 896025
|
|
|
|
|
+ #self.curl_obj.reset() # reset all old settings away, just in case
|
|
|
|
|
# setup any ranges
|
|
|
|
|
self._set_opts()
|
|
|
|
|
self._do_grab()
|
|
|
|
|
@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
if self.opts.range:
|
|
|
|
|
rt = self.opts.range
|
|
|
|
@ -1206,7 +1220,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
if rt:
|
|
|
|
|
header = range_tuple_to_header(rt)
|
|
|
|
|
@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
|
|
|
|
|
#fh, self._temp_name = mkstemp()
|
|
|
|
|
#self.fo = open(self._temp_name, 'wb')
|
|
|
|
|
|
|
|
|
@ -1260,7 +1274,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
else:
|
|
|
|
|
#self.fo = open(self._temp_name, 'r')
|
|
|
|
|
self.fo.seek(0)
|
|
|
|
|
@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
|
|
|
|
|
if self._prog_running:
|
|
|
|
|
downloaded += self._reget_length
|
|
|
|
|
self.opts.progress_obj.update(downloaded)
|
|
|
|
@ -1286,7 +1300,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
msg = _("Downloaded more than max size for %s: %s > %s") \
|
|
|
|
|
% (self.url, cur, max_size)
|
|
|
|
|
@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
@ -1300,7 +1314,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
def read(self, amt=None):
|
|
|
|
|
self._fill_buffer(amt)
|
|
|
|
|
if amt is None:
|
|
|
|
|
@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
|
|
|
|
|
self.opts.progress_obj.end(self._amount_read)
|
|
|
|
|
self.fo.close()
|
|
|
|
|
|
|
|
|
@ -1323,7 +1337,7 @@ index e090e90..1afb2c5 100644
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
|
# DEPRECATED FUNCTIONS
|
|
|
|
|
@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
|