|
|
|
@ -1,5 +1,5 @@
|
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
|
|
|
index cf51dff..9692219 100644
|
|
|
|
|
index cf51dff..979b4c1 100644
|
|
|
|
|
--- a/urlgrabber/grabber.py
|
|
|
|
|
+++ b/urlgrabber/grabber.py
|
|
|
|
|
@@ -402,11 +402,11 @@ import urllib
|
|
|
|
@ -104,7 +104,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
+ self.fo.write(buf)
|
|
|
|
|
+ return len(buf)
|
|
|
|
|
+ except KeyboardInterrupt:
|
|
|
|
|
+ return pycurl.READFUNC_ABORT
|
|
|
|
|
+ return -1
|
|
|
|
|
+
|
|
|
|
|
def _hdr_retrieve(self, buf):
|
|
|
|
|
- self._hdr_dump += buf
|
|
|
|
@ -199,7 +199,23 @@ index cf51dff..9692219 100644
|
|
|
|
|
|
|
|
|
|
#headers:
|
|
|
|
|
if opts.http_headers and self.scheme in ('http', 'https'):
|
|
|
|
|
@@ -1590,7 +1627,7 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1578,19 +1615,21 @@ class PyCurlFileObject():
|
|
|
|
|
if scheme not in ('ftp'):
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
+ if proxy == '_none_': proxy = ""
|
|
|
|
|
self.curl_obj.setopt(pycurl.PROXY, proxy)
|
|
|
|
|
elif self.scheme in ('http', 'https'):
|
|
|
|
|
if scheme not in ('http', 'https'):
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
+ if proxy == '_none_': proxy = ""
|
|
|
|
|
self.curl_obj.setopt(pycurl.PROXY, proxy)
|
|
|
|
|
-
|
|
|
|
|
- # username/password/auth settings
|
|
|
|
|
+
|
|
|
|
|
+ # FIXME username/password/auth settings
|
|
|
|
|
|
|
|
|
|
#posts - simple - expects the fields as they are
|
|
|
|
|
if opts.data:
|
|
|
|
|
self.curl_obj.setopt(pycurl.POST, True)
|
|
|
|
@ -208,7 +224,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
|
|
|
|
|
# our url
|
|
|
|
|
self.curl_obj.setopt(pycurl.URL, self.url)
|
|
|
|
|
@@ -1607,18 +1644,51 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1607,18 +1646,62 @@ class PyCurlFileObject():
|
|
|
|
|
# to other URLGrabErrors from
|
|
|
|
|
# http://curl.haxx.se/libcurl/c/libcurl-errors.html
|
|
|
|
|
# this covers e.args[0] == 22 pretty well - which will be common
|
|
|
|
@ -234,6 +250,17 @@ index cf51dff..9692219 100644
|
|
|
|
|
+ err = URLGrabError(14, msg)
|
|
|
|
|
+ err.url = self.url
|
|
|
|
|
+ raise err
|
|
|
|
|
+
|
|
|
|
|
+ if e.args[0] == 42:
|
|
|
|
|
+ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
|
|
|
|
|
+ err.url = self.url
|
|
|
|
|
+ # this is probably wrong but ultimately this is what happens
|
|
|
|
|
+ # we have a legit http code and a pycurl 'writer failed' code
|
|
|
|
|
+ # which almost always means something aborted it from outside
|
|
|
|
|
+ # since we cannot know what it is -I'm banking on it being
|
|
|
|
|
+ # a ctrl-c. XXXX - if there's a way of going back two raises to
|
|
|
|
|
+ # figure out what aborted the pycurl process FIXME
|
|
|
|
|
+ raise KeyboardInterrupt
|
|
|
|
|
+
|
|
|
|
|
+ elif e.args[0] == 58:
|
|
|
|
|
+ msg = _("problem with the local client certificate")
|
|
|
|
@ -266,7 +293,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
self.curl_obj = _curl_cache
|
|
|
|
|
self.curl_obj.reset() # reset all old settings away, just in case
|
|
|
|
|
# setup any ranges
|
|
|
|
|
@@ -1630,11 +1700,9 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1630,11 +1713,9 @@ class PyCurlFileObject():
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def _build_range(self):
|
|
|
|
@ -279,7 +306,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
# we have reget turned on and we're dumping to a file
|
|
|
|
|
try:
|
|
|
|
|
s = os.stat(self.filename)
|
|
|
|
|
@@ -1726,10 +1794,10 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1726,10 +1807,10 @@ class PyCurlFileObject():
|
|
|
|
|
if self._complete:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
@ -292,7 +319,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
if self.append: mode = 'ab'
|
|
|
|
|
else: mode = 'wb'
|
|
|
|
|
|
|
|
|
|
@@ -1746,19 +1814,23 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1746,19 +1827,23 @@ class PyCurlFileObject():
|
|
|
|
|
else:
|
|
|
|
|
self._prog_reportname = 'MEMORY'
|
|
|
|
|
self._prog_basename = 'MEMORY'
|
|
|
|
@ -321,7 +348,7 @@ index cf51dff..9692219 100644
|
|
|
|
|
# set the time
|
|
|
|
|
mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
|
|
|
|
|
if mod_time != -1:
|
|
|
|
|
@@ -1766,7 +1838,8 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1766,7 +1851,8 @@ class PyCurlFileObject():
|
|
|
|
|
# re open it
|
|
|
|
|
self.fo = open(self.filename, 'r')
|
|
|
|
|
else:
|
|
|
|
@ -331,10 +358,21 @@ index cf51dff..9692219 100644
|
|
|
|
|
|
|
|
|
|
self._complete = True
|
|
|
|
|
|
|
|
|
|
@@ -1838,6 +1911,13 @@ class PyCurlFileObject():
|
|
|
|
|
downloaded += self._reget_length
|
|
|
|
|
self.opts.progress_obj.update(downloaded)
|
|
|
|
|
@@ -1834,10 +1920,20 @@ class PyCurlFileObject():
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
def _progress_update(self, download_total, downloaded, upload_total, uploaded):
|
|
|
|
|
- if self._prog_running:
|
|
|
|
|
- downloaded += self._reget_length
|
|
|
|
|
- self.opts.progress_obj.update(downloaded)
|
|
|
|
|
-
|
|
|
|
|
+ try:
|
|
|
|
|
+ if self._prog_running:
|
|
|
|
|
+ downloaded += self._reget_length
|
|
|
|
|
+ self.opts.progress_obj.update(downloaded)
|
|
|
|
|
+ except KeyboardInterrupt:
|
|
|
|
|
+ return -1
|
|
|
|
|
+
|
|
|
|
|
+ def _to_utf8(self, obj, errors='replace'):
|
|
|
|
|
+ '''convert 'unicode' to an encoded utf-8 byte string '''
|
|
|
|
|
+ # stolen from yum.i18n
|
|
|
|
|