Merge branch 'f19'

epel9
Zdenek Pavlas 12 years ago
commit 4b9511117b

@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber Name: python-urlgrabber
Version: 3.9.1 Version: 3.9.1
Release: 26%{?dist} Release: 28%{?dist}
Source0: urlgrabber-%{version}.tar.gz Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch Patch1: urlgrabber-HEAD.patch
@ -44,6 +44,13 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog %changelog
* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-28
- Update to latest HEAD.
- Add the "minrate" option. BZ 964298
- Workaround progress "!!!" end for file:// repos.
- add URLGrabError.code to the external downloader API
- Disable GSSNEGOTIATE to work around a curl bug. BZ 960163
* Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26 * Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26
- Update to latest HEAD. - Update to latest HEAD.
- Handle HTTP 200 response to range requests correctly. BZ 919076 - Handle HTTP 200 response to range requests correctly. BZ 919076

@ -81,7 +81,7 @@ index 518e512..07881b3 100644
try: try:
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755 new file mode 100755
index 0000000..3dafb12 index 0000000..9ea0e70
--- /dev/null --- /dev/null
+++ b/scripts/urlgrabber-ext-down +++ b/scripts/urlgrabber-ext-down
@@ -0,0 +1,75 @@ @@ -0,0 +1,75 @@
@ -155,7 +155,7 @@ index 0000000..3dafb12
+ ug_err = 'OK' + ug_err = 'OK'
+ except URLGrabError, e: + except URLGrabError, e:
+ size = 0 + size = 0
+ ug_err = '%d %s' % e.args + ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror)
+ write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err) + write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err)
+ +
+if __name__ == '__main__': +if __name__ == '__main__':
@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644
# set to a proftp server only. we're working around a couple of # set to a proftp server only. we're working around a couple of
diff --git a/test/test_mirror.py b/test/test_mirror.py diff --git a/test/test_mirror.py b/test/test_mirror.py
index 70fe069..cb63a41 100644 index 70fe069..6fdb668 100644
--- a/test/test_mirror.py --- a/test/test_mirror.py
+++ b/test/test_mirror.py +++ b/test/test_mirror.py
@@ -28,7 +28,7 @@ import os @@ -28,7 +28,7 @@ import os
@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644
def urlgrab(self, url, filename=None, **kwargs): def urlgrab(self, url, filename=None, **kwargs):
self.calls.append( (url, filename) ) self.calls.append( (url, filename) )
@@ -265,6 +269,38 @@ class ActionTests(TestCase):
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+class HttpReplyCode(TestCase):
+ def setUp(self):
+ def server():
+ import socket
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ s.bind(('localhost', 2000)); s.listen(1)
+ while 1:
+ c, a = s.accept()
+ while not c.recv(4096).endswith('\r\n\r\n'): pass
+ c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
+ c.close()
+ import thread
+ self.reply = 503, "Busy"
+ thread.start_new_thread(server, ())
+
+ def failure(obj):
+ self.code = getattr(obj.exception, 'code', None)
+ return {}
+ self.g = URLGrabber()
+ self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
+
+ def test_grab(self):
+ self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
+ self.assertEquals(self.code, 503); del self.code
+
+ err = []
+ self.mg.urlgrab('foo', async = True, failfunc = err.append)
+ urlgrabber.grabber.parallel_wait()
+ self.assertEquals([e.exception.errno for e in err], [256])
+ self.assertEquals(self.code, 503); del self.code
+
def suite():
tl = TestLoader()
return tl.loadTestsFromModule(sys.modules[__name__])
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 3e5f3b7..8eeaeda 100644 index 3e5f3b7..8eeaeda 100644
--- a/urlgrabber/byterange.py --- a/urlgrabber/byterange.py
@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb) return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index e090e90..9526dc1 100644 index e090e90..05ea9c3 100644
--- a/urlgrabber/grabber.py --- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -306,12 +345,19 @@ index e090e90..9526dc1 100644
text = None text = None
specifies alternative text to be passed to the progress meter specifies alternative text to be passed to the progress meter
@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs) @@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs)
(which can be set on default_grabber.throttle) is used. See (which can be set on default_grabber.throttle) is used. See
BANDWIDTH THROTTLING for more information. BANDWIDTH THROTTLING for more information.
- timeout = None - timeout = None
+ timeout = 300 + timeout = 300
+
+ a positive integer expressing the number of seconds to wait before
+ timing out attempts to connect to a server. If the value is None
+ or 0, connection attempts will not time out. The timeout is passed
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
- a positive float expressing the number of seconds to wait for socket - a positive float expressing the number of seconds to wait for socket
- operations. If the value is None or 0.0, socket operations will block - operations. If the value is None or 0.0, socket operations will block
@ -319,16 +365,15 @@ index e090e90..9526dc1 100644
- method on the Socket object used for the request. See the Python - method on the Socket object used for the request. See the Python
- documentation on settimeout for more information. - documentation on settimeout for more information.
- http://www.python.org/doc/current/lib/socket-objects.html - http://www.python.org/doc/current/lib/socket-objects.html
+ a positive integer expressing the number of seconds to wait before + minrate = 1000
+ timing out attempts to connect to a server. If the value is None +
+ or 0, connection attempts will not time out. The timeout is passed + This sets the low speed threshold in bytes per second. If the server
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see + is sending data slower than this for at least `timeout' seconds, the
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information. + library aborts the connection.
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
bandwidth = 0 bandwidth = 0
@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs) @@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs)
note that proxy authentication information may be provided using note that proxy authentication information may be provided using
normal URL constructs: normal URL constructs:
proxies={ 'http' : 'http://user:host@foo:3128' } proxies={ 'http' : 'http://user:host@foo:3128' }
@ -343,7 +388,7 @@ index e090e90..9526dc1 100644
prefix = None prefix = None
@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs) @@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs)
control, you should probably subclass URLParser and pass it in via control, you should probably subclass URLParser and pass it in via
the 'urlparser' option. the 'urlparser' option.
@ -356,7 +401,7 @@ index e090e90..9526dc1 100644
ssl_ca_cert = None ssl_ca_cert = None
this option can be used if M2Crypto is available and will be this option can be used if M2Crypto is available and will be
@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs) @@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs)
No-op when using the curl backend (default) No-op when using the curl backend (default)
@ -441,7 +486,7 @@ index e090e90..9526dc1 100644
RETRY RELATED ARGUMENTS RETRY RELATED ARGUMENTS
@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS @@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS
but it cannot (without severe trickiness) prevent the exception but it cannot (without severe trickiness) prevent the exception
from being raised. from being raised.
@ -457,7 +502,7 @@ index e090e90..9526dc1 100644
interrupt_callback = None interrupt_callback = None
This callback is called if KeyboardInterrupt is received at any This callback is called if KeyboardInterrupt is received at any
@@ -420,6 +486,7 @@ import time @@ -420,6 +492,7 @@ import time
import string import string
import urllib import urllib
import urllib2 import urllib2
@ -465,7 +510,7 @@ index e090e90..9526dc1 100644
import mimetools import mimetools
import thread import thread
import types import types
@@ -428,9 +495,17 @@ import pycurl @@ -428,9 +501,17 @@ import pycurl
from ftplib import parse150 from ftplib import parse150
from StringIO import StringIO from StringIO import StringIO
from httplib import HTTPException from httplib import HTTPException
@ -484,7 +529,7 @@ index e090e90..9526dc1 100644
######################################################################## ########################################################################
# MODULE INITIALIZATION # MODULE INITIALIZATION
######################################################################## ########################################################################
@@ -439,6 +514,12 @@ try: @@ -439,6 +520,12 @@ try:
except: except:
__version__ = '???' __version__ = '???'
@ -497,7 +542,7 @@ index e090e90..9526dc1 100644
######################################################################## ########################################################################
# functions for debugging output. These functions are here because they # functions for debugging output. These functions are here because they
# are also part of the module initialization. # are also part of the module initialization.
@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None): @@ -504,6 +591,7 @@ def _init_default_logger(logspec=None):
else: handler = logging.FileHandler(filename) else: handler = logging.FileHandler(filename)
handler.setFormatter(formatter) handler.setFormatter(formatter)
DBOBJ = logging.getLogger('urlgrabber') DBOBJ = logging.getLogger('urlgrabber')
@ -505,7 +550,7 @@ index e090e90..9526dc1 100644
DBOBJ.addHandler(handler) DBOBJ.addHandler(handler)
DBOBJ.setLevel(level) DBOBJ.setLevel(level)
except (KeyError, ImportError, ValueError): except (KeyError, ImportError, ValueError):
@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None): @@ -512,8 +600,8 @@ def _init_default_logger(logspec=None):
def _log_package_state(): def _log_package_state():
if not DEBUG: return if not DEBUG: return
@ -516,7 +561,7 @@ index e090e90..9526dc1 100644
_init_default_logger() _init_default_logger()
_log_package_state() _log_package_state()
@@ -527,6 +609,29 @@ def _(st): @@ -527,6 +615,29 @@ def _(st):
# END MODULE INITIALIZATION # END MODULE INITIALIZATION
######################################################################## ########################################################################
@ -546,7 +591,7 @@ index e090e90..9526dc1 100644
class URLGrabError(IOError): class URLGrabError(IOError):
@@ -662,6 +767,7 @@ class URLParser: @@ -662,6 +773,7 @@ class URLParser:
opts.quote = 0 --> do not quote it opts.quote = 0 --> do not quote it
opts.quote = None --> guess opts.quote = None --> guess
""" """
@ -554,7 +599,7 @@ index e090e90..9526dc1 100644
quote = opts.quote quote = opts.quote
if opts.prefix: if opts.prefix:
@@ -768,6 +874,41 @@ class URLGrabberOptions: @@ -768,6 +880,41 @@ class URLGrabberOptions:
else: # throttle is a float else: # throttle is a float
return self.bandwidth * self.throttle return self.bandwidth * self.throttle
@ -596,7 +641,7 @@ index e090e90..9526dc1 100644
def derive(self, **kwargs): def derive(self, **kwargs):
"""Create a derived URLGrabberOptions instance. """Create a derived URLGrabberOptions instance.
This method creates a new instance and overrides the This method creates a new instance and overrides the
@@ -791,30 +932,37 @@ class URLGrabberOptions: @@ -791,30 +938,38 @@ class URLGrabberOptions:
provided here. provided here.
""" """
self.progress_obj = None self.progress_obj = None
@ -624,6 +669,7 @@ index e090e90..9526dc1 100644
self.cache_openers = True self.cache_openers = True
- self.timeout = None - self.timeout = None
+ self.timeout = 300 + self.timeout = 300
+ self.minrate = None
self.text = None self.text = None
self.http_headers = None self.http_headers = None
self.ftp_headers = None self.ftp_headers = None
@ -635,7 +681,7 @@ index e090e90..9526dc1 100644
self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
self.ssl_context = None # no-op in pycurl self.ssl_context = None # no-op in pycurl
self.ssl_verify_peer = True # check peer's cert for authenticityb self.ssl_verify_peer = True # check peer's cert for authenticityb
@@ -827,6 +975,12 @@ class URLGrabberOptions: @@ -827,6 +982,12 @@ class URLGrabberOptions:
self.size = None # if we know how big the thing we're getting is going self.size = None # if we know how big the thing we're getting is going
# to be. this is ultimately a MAXIMUM size for the file # to be. this is ultimately a MAXIMUM size for the file
self.max_header_size = 2097152 #2mb seems reasonable for maximum header size self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@ -648,7 +694,7 @@ index e090e90..9526dc1 100644
def __repr__(self): def __repr__(self):
return self.format() return self.format()
@@ -846,7 +1000,18 @@ class URLGrabberOptions: @@ -846,7 +1007,18 @@ class URLGrabberOptions:
s = s + indent + '}' s = s + indent + '}'
return s return s
@ -668,7 +714,7 @@ index e090e90..9526dc1 100644
"""Provides easy opening of URLs with a variety of options. """Provides easy opening of URLs with a variety of options.
All options are specified as kwargs. Options may be specified when All options are specified as kwargs. Options may be specified when
@@ -872,7 +1037,6 @@ class URLGrabber: @@ -872,7 +1044,6 @@ class URLGrabber:
# beware of infinite loops :) # beware of infinite loops :)
tries = tries + 1 tries = tries + 1
exception = None exception = None
@ -676,7 +722,7 @@ index e090e90..9526dc1 100644
callback = None callback = None
if DEBUG: DEBUG.info('attempt %i/%s: %s', if DEBUG: DEBUG.info('attempt %i/%s: %s',
tries, opts.retry, args[0]) tries, opts.retry, args[0])
@@ -883,54 +1047,62 @@ class URLGrabber: @@ -883,54 +1054,62 @@ class URLGrabber:
except URLGrabError, e: except URLGrabError, e:
exception = e exception = e
callback = opts.failure_callback callback = opts.failure_callback
@ -746,7 +792,7 @@ index e090e90..9526dc1 100644
if scheme == 'file' and not opts.copy_local: if scheme == 'file' and not opts.copy_local:
# just return the name of the local file - don't make a # just return the name of the local file - don't make a
# copy currently # copy currently
@@ -950,41 +1122,51 @@ class URLGrabber: @@ -950,41 +1129,51 @@ class URLGrabber:
elif not opts.range: elif not opts.range:
if not opts.checkfunc is None: if not opts.checkfunc is None:
@ -813,7 +859,7 @@ index e090e90..9526dc1 100644
if limit is not None: if limit is not None:
limit = limit + 1 limit = limit + 1
@@ -1000,12 +1182,8 @@ class URLGrabber: @@ -1000,12 +1189,8 @@ class URLGrabber:
else: s = fo.read(limit) else: s = fo.read(limit)
if not opts.checkfunc is None: if not opts.checkfunc is None:
@ -828,7 +874,7 @@ index e090e90..9526dc1 100644
finally: finally:
fo.close() fo.close()
return s return s
@@ -1020,6 +1198,7 @@ class URLGrabber: @@ -1020,6 +1205,7 @@ class URLGrabber:
return s return s
def _make_callback(self, callback_obj): def _make_callback(self, callback_obj):
@ -836,7 +882,7 @@ index e090e90..9526dc1 100644
if callable(callback_obj): if callable(callback_obj):
return callback_obj, (), {} return callback_obj, (), {}
else: else:
@@ -1030,7 +1209,7 @@ class URLGrabber: @@ -1030,7 +1216,7 @@ class URLGrabber:
default_grabber = URLGrabber() default_grabber = URLGrabber()
@ -845,7 +891,7 @@ index e090e90..9526dc1 100644
def __init__(self, url, filename, opts): def __init__(self, url, filename, opts):
self.fo = None self.fo = None
self._hdr_dump = '' self._hdr_dump = ''
@@ -1052,10 +1231,13 @@ class PyCurlFileObject(): @@ -1052,10 +1238,13 @@ class PyCurlFileObject():
self._reget_length = 0 self._reget_length = 0
self._prog_running = False self._prog_running = False
self._error = (None, None) self._error = (None, None)
@ -861,7 +907,7 @@ index e090e90..9526dc1 100644
def __getattr__(self, name): def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level. """This effectively allows us to wrap at the instance level.
Any attribute not found in _this_ object will be searched for Any attribute not found in _this_ object will be searched for
@@ -1067,6 +1249,12 @@ class PyCurlFileObject(): @@ -1067,6 +1256,12 @@ class PyCurlFileObject():
def _retrieve(self, buf): def _retrieve(self, buf):
try: try:
@ -874,7 +920,7 @@ index e090e90..9526dc1 100644
if not self._prog_running: if not self._prog_running:
if self.opts.progress_obj: if self.opts.progress_obj:
size = self.size + self._reget_length size = self.size + self._reget_length
@@ -1079,23 +1267,40 @@ class PyCurlFileObject(): @@ -1079,23 +1274,40 @@ class PyCurlFileObject():
self.opts.progress_obj.update(self._amount_read) self.opts.progress_obj.update(self._amount_read)
self._amount_read += len(buf) self._amount_read += len(buf)
@ -921,7 +967,7 @@ index e090e90..9526dc1 100644
elif self.scheme in ['ftp']: elif self.scheme in ['ftp']:
s = None s = None
if buf.startswith('213 '): if buf.startswith('213 '):
@@ -1104,7 +1309,18 @@ class PyCurlFileObject(): @@ -1104,7 +1316,18 @@ class PyCurlFileObject():
s = parse150(buf) s = parse150(buf)
if s: if s:
self.size = int(s) self.size = int(s)
@ -941,7 +987,7 @@ index e090e90..9526dc1 100644
return len(buf) return len(buf)
except KeyboardInterrupt: except KeyboardInterrupt:
return pycurl.READFUNC_ABORT return pycurl.READFUNC_ABORT
@@ -1113,8 +1329,10 @@ class PyCurlFileObject(): @@ -1113,8 +1336,10 @@ class PyCurlFileObject():
if self._parsed_hdr: if self._parsed_hdr:
return self._parsed_hdr return self._parsed_hdr
statusend = self._hdr_dump.find('\n') statusend = self._hdr_dump.find('\n')
@ -952,7 +998,7 @@ index e090e90..9526dc1 100644
self._parsed_hdr = mimetools.Message(hdrfp) self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr return self._parsed_hdr
@@ -1127,6 +1345,9 @@ class PyCurlFileObject(): @@ -1127,6 +1352,9 @@ class PyCurlFileObject():
if not opts: if not opts:
opts = self.opts opts = self.opts
@ -962,7 +1008,7 @@ index e090e90..9526dc1 100644
# defaults we're always going to set # defaults we're always going to set
self.curl_obj.setopt(pycurl.NOPROGRESS, False) self.curl_obj.setopt(pycurl.NOPROGRESS, False)
@@ -1136,11 +1357,21 @@ class PyCurlFileObject(): @@ -1136,11 +1364,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True) self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True) self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -985,7 +1031,7 @@ index e090e90..9526dc1 100644
# maybe to be options later # maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
@@ -1148,9 +1379,11 @@ class PyCurlFileObject(): @@ -1148,9 +1386,11 @@ class PyCurlFileObject():
# timeouts # timeouts
timeout = 300 timeout = 300
@ -995,12 +1041,12 @@ index e090e90..9526dc1 100644
+ if hasattr(opts, 'timeout'): + if hasattr(opts, 'timeout'):
+ timeout = int(opts.timeout or 0) + timeout = int(opts.timeout or 0)
+ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) + self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000) + self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
+ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout) + self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
# ssl options # ssl options
if self.scheme == 'https': if self.scheme == 'https':
@@ -1158,13 +1391,16 @@ class PyCurlFileObject(): @@ -1158,13 +1398,16 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@ -1018,7 +1064,7 @@ index e090e90..9526dc1 100644
if opts.ssl_cert_type: if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass: if opts.ssl_key_pass:
@@ -1187,28 +1423,28 @@ class PyCurlFileObject(): @@ -1187,28 +1430,26 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@ -1042,11 +1088,9 @@ index e090e90..9526dc1 100644
+ # proxy + # proxy
+ if opts.proxy is not None: + if opts.proxy is not None:
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy) + self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
+ auth = pycurl.HTTPAUTH_ANY + self.curl_obj.setopt(pycurl.PROXYAUTH,
+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0): + # All but Kerberos. BZ 769254
+ # BZ 769254: work around a bug in curl < 7.28.0 + pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
+ +
+ if opts.username and opts.password: + if opts.username and opts.password:
+ if self.scheme in ('http', 'https'): + if self.scheme in ('http', 'https'):
@ -1065,7 +1109,7 @@ index e090e90..9526dc1 100644
# our url # our url
self.curl_obj.setopt(pycurl.URL, self.url) self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1228,39 +1464,26 @@ class PyCurlFileObject(): @@ -1228,39 +1469,26 @@ class PyCurlFileObject():
code = self.http_code code = self.http_code
errcode = e.args[0] errcode = e.args[0]
@ -1111,7 +1155,7 @@ index e090e90..9526dc1 100644
# this is probably wrong but ultimately this is what happens # this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code # we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside # which almost always means something aborted it from outside
@@ -1269,36 +1492,70 @@ class PyCurlFileObject(): @@ -1269,36 +1497,70 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME # figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt raise KeyboardInterrupt
@ -1207,7 +1251,7 @@ index e090e90..9526dc1 100644
def _do_open(self): def _do_open(self):
self.curl_obj = _curl_cache self.curl_obj = _curl_cache
@@ -1333,7 +1590,11 @@ class PyCurlFileObject(): @@ -1333,7 +1595,11 @@ class PyCurlFileObject():
if self.opts.range: if self.opts.range:
rt = self.opts.range rt = self.opts.range
@ -1220,7 +1264,7 @@ index e090e90..9526dc1 100644
if rt: if rt:
header = range_tuple_to_header(rt) header = range_tuple_to_header(rt)
@@ -1434,21 +1695,46 @@ class PyCurlFileObject(): @@ -1434,21 +1700,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp() #fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb') #self.fo = open(self._temp_name, 'wb')
@ -1274,7 +1318,7 @@ index e090e90..9526dc1 100644
else: else:
#self.fo = open(self._temp_name, 'r') #self.fo = open(self._temp_name, 'r')
self.fo.seek(0) self.fo.seek(0)
@@ -1526,17 +1812,20 @@ class PyCurlFileObject(): @@ -1526,17 +1817,20 @@ class PyCurlFileObject():
if self._prog_running: if self._prog_running:
downloaded += self._reget_length downloaded += self._reget_length
self.opts.progress_obj.update(downloaded) self.opts.progress_obj.update(downloaded)
@ -1300,7 +1344,7 @@ index e090e90..9526dc1 100644
msg = _("Downloaded more than max size for %s: %s > %s") \ msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size) % (self.url, cur, max_size)
@@ -1544,13 +1833,6 @@ class PyCurlFileObject(): @@ -1544,13 +1838,6 @@ class PyCurlFileObject():
return True return True
return False return False
@ -1314,7 +1358,7 @@ index e090e90..9526dc1 100644
def read(self, amt=None): def read(self, amt=None):
self._fill_buffer(amt) self._fill_buffer(amt)
if amt is None: if amt is None:
@@ -1582,9 +1864,21 @@ class PyCurlFileObject(): @@ -1582,9 +1869,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read) self.opts.progress_obj.end(self._amount_read)
self.fo.close() self.fo.close()
@ -1337,7 +1381,7 @@ index e090e90..9526dc1 100644
##################################################################### #####################################################################
# DEPRECATED FUNCTIONS # DEPRECATED FUNCTIONS
@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, @@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
##################################################################### #####################################################################
@ -1443,7 +1487,7 @@ index e090e90..9526dc1 100644
+ # list of options we pass to downloader + # list of options we pass to downloader
+ _options = ( + _options = (
+ 'url', 'filename', + 'url', 'filename',
+ 'timeout', 'close_connection', 'keepalive', + 'timeout', 'minrate', 'close_connection', 'keepalive',
+ 'throttle', 'bandwidth', 'range', 'reget', + 'throttle', 'bandwidth', 'range', 'reget',
+ 'user_agent', 'http_headers', 'ftp_headers', + 'user_agent', 'http_headers', 'ftp_headers',
+ 'proxy', 'prefix', 'username', 'password', + 'proxy', 'prefix', 'username', 'password',
@ -1478,7 +1522,7 @@ index e090e90..9526dc1 100644
+ raise KeyboardInterrupt + raise KeyboardInterrupt
+ for line in lines: + for line in lines:
+ # parse downloader output + # parse downloader output
+ line = line.split(' ', 5) + line = line.split(' ', 6)
+ _id, size = map(int, line[:2]) + _id, size = map(int, line[:2])
+ if len(line) == 2: + if len(line) == 2:
+ self.running[_id]._progress.update(size) + self.running[_id]._progress.update(size)
@ -1489,7 +1533,9 @@ index e090e90..9526dc1 100644
+ ug_err = None + ug_err = None
+ if DEBUG: DEBUG.info('success') + if DEBUG: DEBUG.info('success')
+ else: + else:
+ ug_err = URLGrabError(int(line[4]), line[5]) + ug_err = URLGrabError(int(line[4]), line[6])
+ if line[5] != '0':
+ ug_err.code = int(line[5])
+ if DEBUG: DEBUG.info('failure: %s', ug_err) + if DEBUG: DEBUG.info('failure: %s', ug_err)
+ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) + _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
+ ret.append((opts, size, ug_err)) + ret.append((opts, size, ug_err))
@ -1983,7 +2029,7 @@ index dad410b..7975f1b 100644
def urlopen(self, url, **kwargs): def urlopen(self, url, **kwargs):
kw = dict(kwargs) kw = dict(kwargs)
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
index dd07c6a..077fd99 100644 index dd07c6a..b456a0c 100644
--- a/urlgrabber/progress.py --- a/urlgrabber/progress.py
+++ b/urlgrabber/progress.py +++ b/urlgrabber/progress.py
@@ -133,8 +133,8 @@ class BaseMeter: @@ -133,8 +133,8 @@ class BaseMeter:
@ -2009,7 +2055,7 @@ index dd07c6a..077fd99 100644
+ return tl.add(' [%-*.*s]' % (blen, blen, bar)) + return tl.add(' [%-*.*s]' % (blen, blen, bar))
+ +
+def _term_add_end(tl, osize, size): +def _term_add_end(tl, osize, size):
+ if osize is not None: + if osize: # osize should be None or >0, but that's been broken.
+ if size > osize: # Is ??? better? Really need something to say < vs >. + if size > osize: # Is ??? better? Really need something to say < vs >.
+ return tl.add(' !!! '), True + return tl.add(' !!! '), True
+ elif size != osize: + elif size != osize:

Loading…
Cancel
Save