Merge branch 'f19'

epel9
Zdenek Pavlas 12 years ago
commit 4b9511117b

@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
Release: 26%{?dist}
Release: 28%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@ -44,6 +44,13 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-28
- Update to latest HEAD.
- Add the "minrate" option. BZ 964298
- Workaround progress "!!!" end for file:// repos.
- add URLGrabError.code to the external downloader API
- Disable GSSNEGOTIATE to work around a curl bug. BZ 960163
* Wed Mar 27 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-26
- Update to latest HEAD.
- Handle HTTP 200 response to range requests correctly. BZ 919076

@ -81,7 +81,7 @@ index 518e512..07881b3 100644
try:
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755
index 0000000..3dafb12
index 0000000..9ea0e70
--- /dev/null
+++ b/scripts/urlgrabber-ext-down
@@ -0,0 +1,75 @@
@ -155,7 +155,7 @@ index 0000000..3dafb12
+ ug_err = 'OK'
+ except URLGrabError, e:
+ size = 0
+ ug_err = '%d %s' % e.args
+ ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror)
+ write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err)
+
+if __name__ == '__main__':
@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644
# set to a proftp server only. we're working around a couple of
diff --git a/test/test_mirror.py b/test/test_mirror.py
index 70fe069..cb63a41 100644
index 70fe069..6fdb668 100644
--- a/test/test_mirror.py
+++ b/test/test_mirror.py
@@ -28,7 +28,7 @@ import os
@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644
def urlgrab(self, url, filename=None, **kwargs):
self.calls.append( (url, filename) )
@@ -265,6 +269,38 @@ class ActionTests(TestCase):
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+class HttpReplyCode(TestCase):
+ def setUp(self):
+ def server():
+ import socket
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ s.bind(('localhost', 2000)); s.listen(1)
+ while 1:
+ c, a = s.accept()
+ while not c.recv(4096).endswith('\r\n\r\n'): pass
+ c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
+ c.close()
+ import thread
+ self.reply = 503, "Busy"
+ thread.start_new_thread(server, ())
+
+ def failure(obj):
+ self.code = getattr(obj.exception, 'code', None)
+ return {}
+ self.g = URLGrabber()
+ self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
+
+ def test_grab(self):
+ self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
+ self.assertEquals(self.code, 503); del self.code
+
+ err = []
+ self.mg.urlgrab('foo', async = True, failfunc = err.append)
+ urlgrabber.grabber.parallel_wait()
+ self.assertEquals([e.exception.errno for e in err], [256])
+ self.assertEquals(self.code, 503); del self.code
+
def suite():
tl = TestLoader()
return tl.loadTestsFromModule(sys.modules[__name__])
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 3e5f3b7..8eeaeda 100644
--- a/urlgrabber/byterange.py
@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index e090e90..9526dc1 100644
index e090e90..05ea9c3 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -306,12 +345,19 @@ index e090e90..9526dc1 100644
text = None
specifies alternative text to be passed to the progress meter
@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
@@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs)
(which can be set on default_grabber.throttle) is used. See
BANDWIDTH THROTTLING for more information.
- timeout = None
+ timeout = 300
+
+ a positive integer expressing the number of seconds to wait before
+ timing out attempts to connect to a server. If the value is None
+ or 0, connection attempts will not time out. The timeout is passed
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
- a positive float expressing the number of seconds to wait for socket
- operations. If the value is None or 0.0, socket operations will block
@ -319,16 +365,15 @@ index e090e90..9526dc1 100644
- method on the Socket object used for the request. See the Python
- documentation on settimeout for more information.
- http://www.python.org/doc/current/lib/socket-objects.html
+ a positive integer expressing the number of seconds to wait before
+ timing out attempts to connect to a server. If the value is None
+ or 0, connection attempts will not time out. The timeout is passed
+ to the underlying pycurl object as its CONNECTTIMEOUT option, see
+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
+ minrate = 1000
+
+ This sets the low speed threshold in bytes per second. If the server
+ is sending data slower than this for at least `timeout' seconds, the
+ library aborts the connection.
bandwidth = 0
@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
@@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs)
note that proxy authentication information may be provided using
normal URL constructs:
proxies={ 'http' : 'http://user:host@foo:3128' }
@ -343,7 +388,7 @@ index e090e90..9526dc1 100644
prefix = None
@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
@@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs)
control, you should probably subclass URLParser and pass it in via
the 'urlparser' option.
@ -356,7 +401,7 @@ index e090e90..9526dc1 100644
ssl_ca_cert = None
this option can be used if M2Crypto is available and will be
@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
@@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs)
No-op when using the curl backend (default)
@ -441,7 +486,7 @@ index e090e90..9526dc1 100644
RETRY RELATED ARGUMENTS
@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
@@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS
but it cannot (without severe trickiness) prevent the exception
from being raised.
@ -457,7 +502,7 @@ index e090e90..9526dc1 100644
interrupt_callback = None
This callback is called if KeyboardInterrupt is received at any
@@ -420,6 +486,7 @@ import time
@@ -420,6 +492,7 @@ import time
import string
import urllib
import urllib2
@ -465,7 +510,7 @@ index e090e90..9526dc1 100644
import mimetools
import thread
import types
@@ -428,9 +495,17 @@ import pycurl
@@ -428,9 +501,17 @@ import pycurl
from ftplib import parse150
from StringIO import StringIO
from httplib import HTTPException
@ -484,7 +529,7 @@ index e090e90..9526dc1 100644
########################################################################
# MODULE INITIALIZATION
########################################################################
@@ -439,6 +514,12 @@ try:
@@ -439,6 +520,12 @@ try:
except:
__version__ = '???'
@ -497,7 +542,7 @@ index e090e90..9526dc1 100644
########################################################################
# functions for debugging output. These functions are here because they
# are also part of the module initialization.
@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None):
@@ -504,6 +591,7 @@ def _init_default_logger(logspec=None):
else: handler = logging.FileHandler(filename)
handler.setFormatter(formatter)
DBOBJ = logging.getLogger('urlgrabber')
@ -505,7 +550,7 @@ index e090e90..9526dc1 100644
DBOBJ.addHandler(handler)
DBOBJ.setLevel(level)
except (KeyError, ImportError, ValueError):
@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None):
@@ -512,8 +600,8 @@ def _init_default_logger(logspec=None):
def _log_package_state():
if not DEBUG: return
@ -516,7 +561,7 @@ index e090e90..9526dc1 100644
_init_default_logger()
_log_package_state()
@@ -527,6 +609,29 @@ def _(st):
@@ -527,6 +615,29 @@ def _(st):
# END MODULE INITIALIZATION
########################################################################
@ -546,7 +591,7 @@ index e090e90..9526dc1 100644
class URLGrabError(IOError):
@@ -662,6 +767,7 @@ class URLParser:
@@ -662,6 +773,7 @@ class URLParser:
opts.quote = 0 --> do not quote it
opts.quote = None --> guess
"""
@ -554,7 +599,7 @@ index e090e90..9526dc1 100644
quote = opts.quote
if opts.prefix:
@@ -768,6 +874,41 @@ class URLGrabberOptions:
@@ -768,6 +880,41 @@ class URLGrabberOptions:
else: # throttle is a float
return self.bandwidth * self.throttle
@ -596,7 +641,7 @@ index e090e90..9526dc1 100644
def derive(self, **kwargs):
"""Create a derived URLGrabberOptions instance.
This method creates a new instance and overrides the
@@ -791,30 +932,37 @@ class URLGrabberOptions:
@@ -791,30 +938,38 @@ class URLGrabberOptions:
provided here.
"""
self.progress_obj = None
@ -624,6 +669,7 @@ index e090e90..9526dc1 100644
self.cache_openers = True
- self.timeout = None
+ self.timeout = 300
+ self.minrate = None
self.text = None
self.http_headers = None
self.ftp_headers = None
@ -635,7 +681,7 @@ index e090e90..9526dc1 100644
self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
self.ssl_context = None # no-op in pycurl
self.ssl_verify_peer = True # check peer's cert for authenticityb
@@ -827,6 +975,12 @@ class URLGrabberOptions:
@@ -827,6 +982,12 @@ class URLGrabberOptions:
self.size = None # if we know how big the thing we're getting is going
# to be. this is ultimately a MAXIMUM size for the file
self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@ -648,7 +694,7 @@ index e090e90..9526dc1 100644
def __repr__(self):
return self.format()
@@ -846,7 +1000,18 @@ class URLGrabberOptions:
@@ -846,7 +1007,18 @@ class URLGrabberOptions:
s = s + indent + '}'
return s
@ -668,7 +714,7 @@ index e090e90..9526dc1 100644
"""Provides easy opening of URLs with a variety of options.
All options are specified as kwargs. Options may be specified when
@@ -872,7 +1037,6 @@ class URLGrabber:
@@ -872,7 +1044,6 @@ class URLGrabber:
# beware of infinite loops :)
tries = tries + 1
exception = None
@ -676,7 +722,7 @@ index e090e90..9526dc1 100644
callback = None
if DEBUG: DEBUG.info('attempt %i/%s: %s',
tries, opts.retry, args[0])
@@ -883,54 +1047,62 @@ class URLGrabber:
@@ -883,54 +1054,62 @@ class URLGrabber:
except URLGrabError, e:
exception = e
callback = opts.failure_callback
@ -746,7 +792,7 @@ index e090e90..9526dc1 100644
if scheme == 'file' and not opts.copy_local:
# just return the name of the local file - don't make a
# copy currently
@@ -950,41 +1122,51 @@ class URLGrabber:
@@ -950,41 +1129,51 @@ class URLGrabber:
elif not opts.range:
if not opts.checkfunc is None:
@ -813,7 +859,7 @@ index e090e90..9526dc1 100644
if limit is not None:
limit = limit + 1
@@ -1000,12 +1182,8 @@ class URLGrabber:
@@ -1000,12 +1189,8 @@ class URLGrabber:
else: s = fo.read(limit)
if not opts.checkfunc is None:
@ -828,7 +874,7 @@ index e090e90..9526dc1 100644
finally:
fo.close()
return s
@@ -1020,6 +1198,7 @@ class URLGrabber:
@@ -1020,6 +1205,7 @@ class URLGrabber:
return s
def _make_callback(self, callback_obj):
@ -836,7 +882,7 @@ index e090e90..9526dc1 100644
if callable(callback_obj):
return callback_obj, (), {}
else:
@@ -1030,7 +1209,7 @@ class URLGrabber:
@@ -1030,7 +1216,7 @@ class URLGrabber:
default_grabber = URLGrabber()
@ -845,7 +891,7 @@ index e090e90..9526dc1 100644
def __init__(self, url, filename, opts):
self.fo = None
self._hdr_dump = ''
@@ -1052,10 +1231,13 @@ class PyCurlFileObject():
@@ -1052,10 +1238,13 @@ class PyCurlFileObject():
self._reget_length = 0
self._prog_running = False
self._error = (None, None)
@ -861,7 +907,7 @@ index e090e90..9526dc1 100644
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
Any attribute not found in _this_ object will be searched for
@@ -1067,6 +1249,12 @@ class PyCurlFileObject():
@@ -1067,6 +1256,12 @@ class PyCurlFileObject():
def _retrieve(self, buf):
try:
@ -874,7 +920,7 @@ index e090e90..9526dc1 100644
if not self._prog_running:
if self.opts.progress_obj:
size = self.size + self._reget_length
@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
@@ -1079,23 +1274,40 @@ class PyCurlFileObject():
self.opts.progress_obj.update(self._amount_read)
self._amount_read += len(buf)
@ -921,7 +967,7 @@ index e090e90..9526dc1 100644
elif self.scheme in ['ftp']:
s = None
if buf.startswith('213 '):
@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
@@ -1104,7 +1316,18 @@ class PyCurlFileObject():
s = parse150(buf)
if s:
self.size = int(s)
@ -941,7 +987,7 @@ index e090e90..9526dc1 100644
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
@@ -1113,8 +1336,10 @@ class PyCurlFileObject():
if self._parsed_hdr:
return self._parsed_hdr
statusend = self._hdr_dump.find('\n')
@ -952,7 +998,7 @@ index e090e90..9526dc1 100644
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
@@ -1127,6 +1352,9 @@ class PyCurlFileObject():
if not opts:
opts = self.opts
@ -962,7 +1008,7 @@ index e090e90..9526dc1 100644
# defaults we're always going to set
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
@@ -1136,11 +1364,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -985,7 +1031,7 @@ index e090e90..9526dc1 100644
# maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
@@ -1148,9 +1386,11 @@ class PyCurlFileObject():
# timeouts
timeout = 300
@ -995,12 +1041,12 @@ index e090e90..9526dc1 100644
+ if hasattr(opts, 'timeout'):
+ timeout = int(opts.timeout or 0)
+ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000)
+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
+ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
# ssl options
if self.scheme == 'https':
@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
@@ -1158,13 +1398,16 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@ -1018,7 +1064,7 @@ index e090e90..9526dc1 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
@@ -1187,28 +1430,26 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@ -1042,11 +1088,9 @@ index e090e90..9526dc1 100644
+ # proxy
+ if opts.proxy is not None:
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
+ auth = pycurl.HTTPAUTH_ANY
+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
+ # BZ 769254: work around a bug in curl < 7.28.0
+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
+ self.curl_obj.setopt(pycurl.PROXYAUTH,
+ # All but Kerberos. BZ 769254
+ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+
+ if opts.username and opts.password:
+ if self.scheme in ('http', 'https'):
@ -1065,7 +1109,7 @@ index e090e90..9526dc1 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
@@ -1228,39 +1469,26 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@ -1111,7 +1155,7 @@ index e090e90..9526dc1 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
@@ -1269,36 +1497,70 @@ class PyCurlFileObject():
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
@ -1207,7 +1251,7 @@ index e090e90..9526dc1 100644
def _do_open(self):
self.curl_obj = _curl_cache
@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
@@ -1333,7 +1595,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@ -1220,7 +1264,7 @@ index e090e90..9526dc1 100644
if rt:
header = range_tuple_to_header(rt)
@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
@@ -1434,21 +1700,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@ -1274,7 +1318,7 @@ index e090e90..9526dc1 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
@@ -1526,17 +1817,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@ -1300,7 +1344,7 @@ index e090e90..9526dc1 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
@@ -1544,13 +1838,6 @@ class PyCurlFileObject():
return True
return False
@ -1314,7 +1358,7 @@ index e090e90..9526dc1 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
@@ -1582,9 +1869,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@ -1337,7 +1381,7 @@ index e090e90..9526dc1 100644
#####################################################################
# DEPRECATED FUNCTIONS
@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@ -1443,7 +1487,7 @@ index e090e90..9526dc1 100644
+ # list of options we pass to downloader
+ _options = (
+ 'url', 'filename',
+ 'timeout', 'close_connection', 'keepalive',
+ 'timeout', 'minrate', 'close_connection', 'keepalive',
+ 'throttle', 'bandwidth', 'range', 'reget',
+ 'user_agent', 'http_headers', 'ftp_headers',
+ 'proxy', 'prefix', 'username', 'password',
@ -1478,7 +1522,7 @@ index e090e90..9526dc1 100644
+ raise KeyboardInterrupt
+ for line in lines:
+ # parse downloader output
+ line = line.split(' ', 5)
+ line = line.split(' ', 6)
+ _id, size = map(int, line[:2])
+ if len(line) == 2:
+ self.running[_id]._progress.update(size)
@ -1489,7 +1533,9 @@ index e090e90..9526dc1 100644
+ ug_err = None
+ if DEBUG: DEBUG.info('success')
+ else:
+ ug_err = URLGrabError(int(line[4]), line[5])
+ ug_err = URLGrabError(int(line[4]), line[6])
+ if line[5] != '0':
+ ug_err.code = int(line[5])
+ if DEBUG: DEBUG.info('failure: %s', ug_err)
+ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
+ ret.append((opts, size, ug_err))
@ -1983,7 +2029,7 @@ index dad410b..7975f1b 100644
def urlopen(self, url, **kwargs):
kw = dict(kwargs)
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
index dd07c6a..077fd99 100644
index dd07c6a..b456a0c 100644
--- a/urlgrabber/progress.py
+++ b/urlgrabber/progress.py
@@ -133,8 +133,8 @@ class BaseMeter:
@ -2009,7 +2055,7 @@ index dd07c6a..077fd99 100644
+ return tl.add(' [%-*.*s]' % (blen, blen, bar))
+
+def _term_add_end(tl, osize, size):
+ if osize is not None:
+ if osize: # osize should be None or >0, but that's been broken.
+ if size > osize: # Is ??? better? Really need something to say < vs >.
+ return tl.add(' !!! '), True
+ elif size != osize:

Loading…
Cancel
Save