|
|
|
@ -81,7 +81,7 @@ index 518e512..07881b3 100644
|
|
|
|
|
try:
|
|
|
|
|
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
|
|
|
|
|
new file mode 100755
|
|
|
|
|
index 0000000..3dafb12
|
|
|
|
|
index 0000000..9ea0e70
|
|
|
|
|
--- /dev/null
|
|
|
|
|
+++ b/scripts/urlgrabber-ext-down
|
|
|
|
|
@@ -0,0 +1,75 @@
|
|
|
|
@ -155,7 +155,7 @@ index 0000000..3dafb12
|
|
|
|
|
+ ug_err = 'OK'
|
|
|
|
|
+ except URLGrabError, e:
|
|
|
|
|
+ size = 0
|
|
|
|
|
+ ug_err = '%d %s' % e.args
|
|
|
|
|
+ ug_err = '%d %d %s' % (e.errno, getattr(e, 'code', 0), e.strerror)
|
|
|
|
|
+ write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err)
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
@ -190,7 +190,7 @@ index 50c6348..5fb43f9 100644
|
|
|
|
|
|
|
|
|
|
# set to a proftp server only. we're working around a couple of
|
|
|
|
|
diff --git a/test/test_mirror.py b/test/test_mirror.py
|
|
|
|
|
index 70fe069..cb63a41 100644
|
|
|
|
|
index 70fe069..6fdb668 100644
|
|
|
|
|
--- a/test/test_mirror.py
|
|
|
|
|
+++ b/test/test_mirror.py
|
|
|
|
|
@@ -28,7 +28,7 @@ import os
|
|
|
|
@ -220,6 +220,45 @@ index 70fe069..cb63a41 100644
|
|
|
|
|
|
|
|
|
|
def urlgrab(self, url, filename=None, **kwargs):
|
|
|
|
|
self.calls.append( (url, filename) )
|
|
|
|
|
@@ -265,6 +269,38 @@ class ActionTests(TestCase):
|
|
|
|
|
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+class HttpReplyCode(TestCase):
|
|
|
|
|
+ def setUp(self):
|
|
|
|
|
+ def server():
|
|
|
|
|
+ import socket
|
|
|
|
|
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
|
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
|
|
|
+ s.bind(('localhost', 2000)); s.listen(1)
|
|
|
|
|
+ while 1:
|
|
|
|
|
+ c, a = s.accept()
|
|
|
|
|
+ while not c.recv(4096).endswith('\r\n\r\n'): pass
|
|
|
|
|
+ c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
|
|
|
|
|
+ c.close()
|
|
|
|
|
+ import thread
|
|
|
|
|
+ self.reply = 503, "Busy"
|
|
|
|
|
+ thread.start_new_thread(server, ())
|
|
|
|
|
+
|
|
|
|
|
+ def failure(obj):
|
|
|
|
|
+ self.code = getattr(obj.exception, 'code', None)
|
|
|
|
|
+ return {}
|
|
|
|
|
+ self.g = URLGrabber()
|
|
|
|
|
+ self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
|
|
|
|
|
+
|
|
|
|
|
+ def test_grab(self):
|
|
|
|
|
+ self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
|
|
|
|
|
+ self.assertEquals(self.code, 503); del self.code
|
|
|
|
|
+
|
|
|
|
|
+ err = []
|
|
|
|
|
+ self.mg.urlgrab('foo', async = True, failfunc = err.append)
|
|
|
|
|
+ urlgrabber.grabber.parallel_wait()
|
|
|
|
|
+ self.assertEquals([e.exception.errno for e in err], [256])
|
|
|
|
|
+ self.assertEquals(self.code, 503); del self.code
|
|
|
|
|
+
|
|
|
|
|
def suite():
|
|
|
|
|
tl = TestLoader()
|
|
|
|
|
return tl.loadTestsFromModule(sys.modules[__name__])
|
|
|
|
|
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
|
|
|
|
|
index 3e5f3b7..8eeaeda 100644
|
|
|
|
|
--- a/urlgrabber/byterange.py
|
|
|
|
@ -275,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
|
|
|
|
|
return (fb,lb)
|
|
|
|
|
|
|
|
|
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
|
|
|
|
index e090e90..9526dc1 100644
|
|
|
|
|
index e090e90..37d1297 100644
|
|
|
|
|
--- a/urlgrabber/grabber.py
|
|
|
|
|
+++ b/urlgrabber/grabber.py
|
|
|
|
|
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
|
|
|
|
@ -1018,7 +1057,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
if opts.ssl_cert_type:
|
|
|
|
|
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
|
|
|
|
|
if opts.ssl_key_pass:
|
|
|
|
|
@@ -1187,28 +1423,28 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1187,28 +1423,26 @@ class PyCurlFileObject():
|
|
|
|
|
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
|
|
|
|
|
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
|
|
|
|
|
|
|
|
|
@ -1042,11 +1081,9 @@ index e090e90..9526dc1 100644
|
|
|
|
|
+ # proxy
|
|
|
|
|
+ if opts.proxy is not None:
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
|
|
|
|
|
+ auth = pycurl.HTTPAUTH_ANY
|
|
|
|
|
+ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
|
|
|
|
|
+ # BZ 769254: work around a bug in curl < 7.28.0
|
|
|
|
|
+ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
|
|
|
|
|
+ self.curl_obj.setopt(pycurl.PROXYAUTH,
|
|
|
|
|
+ # All but Kerberos. BZ 769254
|
|
|
|
|
+ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
|
|
|
|
|
+
|
|
|
|
|
+ if opts.username and opts.password:
|
|
|
|
|
+ if self.scheme in ('http', 'https'):
|
|
|
|
@ -1065,7 +1102,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
|
|
|
|
|
# our url
|
|
|
|
|
self.curl_obj.setopt(pycurl.URL, self.url)
|
|
|
|
|
@@ -1228,39 +1464,26 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1228,39 +1462,26 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
code = self.http_code
|
|
|
|
|
errcode = e.args[0]
|
|
|
|
@ -1111,7 +1148,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
# this is probably wrong but ultimately this is what happens
|
|
|
|
|
# we have a legit http code and a pycurl 'writer failed' code
|
|
|
|
|
# which almost always means something aborted it from outside
|
|
|
|
|
@@ -1269,36 +1492,70 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1269,36 +1490,70 @@ class PyCurlFileObject():
|
|
|
|
|
# figure out what aborted the pycurl process FIXME
|
|
|
|
|
raise KeyboardInterrupt
|
|
|
|
|
|
|
|
|
@ -1207,7 +1244,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
|
|
|
|
|
def _do_open(self):
|
|
|
|
|
self.curl_obj = _curl_cache
|
|
|
|
|
@@ -1333,7 +1590,11 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1333,7 +1588,11 @@ class PyCurlFileObject():
|
|
|
|
|
|
|
|
|
|
if self.opts.range:
|
|
|
|
|
rt = self.opts.range
|
|
|
|
@ -1220,7 +1257,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
|
|
|
|
|
if rt:
|
|
|
|
|
header = range_tuple_to_header(rt)
|
|
|
|
|
@@ -1434,21 +1695,46 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1434,21 +1693,46 @@ class PyCurlFileObject():
|
|
|
|
|
#fh, self._temp_name = mkstemp()
|
|
|
|
|
#self.fo = open(self._temp_name, 'wb')
|
|
|
|
|
|
|
|
|
@ -1274,7 +1311,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
else:
|
|
|
|
|
#self.fo = open(self._temp_name, 'r')
|
|
|
|
|
self.fo.seek(0)
|
|
|
|
|
@@ -1526,17 +1812,20 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1526,17 +1810,20 @@ class PyCurlFileObject():
|
|
|
|
|
if self._prog_running:
|
|
|
|
|
downloaded += self._reget_length
|
|
|
|
|
self.opts.progress_obj.update(downloaded)
|
|
|
|
@ -1300,7 +1337,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
|
|
|
|
|
msg = _("Downloaded more than max size for %s: %s > %s") \
|
|
|
|
|
% (self.url, cur, max_size)
|
|
|
|
|
@@ -1544,13 +1833,6 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1544,13 +1831,6 @@ class PyCurlFileObject():
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
@ -1314,7 +1351,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
def read(self, amt=None):
|
|
|
|
|
self._fill_buffer(amt)
|
|
|
|
|
if amt is None:
|
|
|
|
|
@@ -1582,9 +1864,21 @@ class PyCurlFileObject():
|
|
|
|
|
@@ -1582,9 +1862,21 @@ class PyCurlFileObject():
|
|
|
|
|
self.opts.progress_obj.end(self._amount_read)
|
|
|
|
|
self.fo.close()
|
|
|
|
|
|
|
|
|
@ -1337,7 +1374,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
|
# DEPRECATED FUNCTIONS
|
|
|
|
|
@@ -1621,6 +1915,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
|
|
@ -1478,7 +1515,7 @@ index e090e90..9526dc1 100644
|
|
|
|
|
+ raise KeyboardInterrupt
|
|
|
|
|
+ for line in lines:
|
|
|
|
|
+ # parse downloader output
|
|
|
|
|
+ line = line.split(' ', 5)
|
|
|
|
|
+ line = line.split(' ', 6)
|
|
|
|
|
+ _id, size = map(int, line[:2])
|
|
|
|
|
+ if len(line) == 2:
|
|
|
|
|
+ self.running[_id]._progress.update(size)
|
|
|
|
@ -1489,7 +1526,9 @@ index e090e90..9526dc1 100644
|
|
|
|
|
+ ug_err = None
|
|
|
|
|
+ if DEBUG: DEBUG.info('success')
|
|
|
|
|
+ else:
|
|
|
|
|
+ ug_err = URLGrabError(int(line[4]), line[5])
|
|
|
|
|
+ ug_err = URLGrabError(int(line[4]), line[6])
|
|
|
|
|
+ if line[5] != '0':
|
|
|
|
|
+ ug_err.code = int(line[5])
|
|
|
|
|
+ if DEBUG: DEBUG.info('failure: %s', ug_err)
|
|
|
|
|
+ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
|
|
|
|
|
+ ret.append((opts, size, ug_err))
|
|
|
|
|