From 198714bffb723ca5b36291046c12ab1ec78b2fb8 Mon Sep 17 00:00:00 2001 From: Valentina Mukhamedzhanova Date: Fri, 29 Aug 2014 16:42:23 +0200 Subject: [PATCH] Don't set speed=0 on a new mirror that 404'd. BZ 1051554 - Support both Python 2 and 3. BZ 985288 --- BZ-1051554-speed-on-404-mirror.patch | 18 + port-tests-to-python3.patch | 658 +++++++++++++++ port-to-python3.patch | 1175 ++++++++++++++++++++++++++ python-urlgrabber.spec | 16 +- 4 files changed, 1864 insertions(+), 3 deletions(-) create mode 100644 BZ-1051554-speed-on-404-mirror.patch create mode 100644 port-tests-to-python3.patch create mode 100644 port-to-python3.patch diff --git a/BZ-1051554-speed-on-404-mirror.patch b/BZ-1051554-speed-on-404-mirror.patch new file mode 100644 index 0000000..b587c3d --- /dev/null +++ b/BZ-1051554-speed-on-404-mirror.patch @@ -0,0 +1,18 @@ +commit fbc995805e9d860366c67819effc3bc7d5d8a8b2 +Author: Zdenek Pavlas +Date: Mon Jan 13 13:20:28 2014 +0100 + + Don't set speed=0 on a new mirror that 404'd. BZ 1051554 + +diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py +index ef18d6a..04f1179 100644 +--- a/urlgrabber/grabber.py ++++ b/urlgrabber/grabber.py +@@ -2418,6 +2418,7 @@ class _TH: + speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2) + fail = 0 + elif getattr(ug_err, 'code', None) == 404: ++ if not ts: return # 1st update, avoid speed=0 + fail = 0 # alive, at least + else: + fail += 1 # seems dead diff --git a/port-tests-to-python3.patch b/port-tests-to-python3.patch new file mode 100644 index 0000000..61ce736 --- /dev/null +++ b/port-tests-to-python3.patch @@ -0,0 +1,658 @@ +commit 8560a386c3ea1e868a8e294c1e318a6ee5319580 +Author: Tomas Radej +Date: Wed Aug 20 13:32:32 2014 +0200 + + Ported test suite + +diff --git a/test/grabberperf.py b/test/grabberperf.py +index 820da2c..d9142fa 100644 +--- a/test/grabberperf.py ++++ b/test/grabberperf.py +@@ -21,11 +21,15 @@ + + import sys + import os +-from os.path import dirname, join as joinpath + import tempfile + import time ++import six ++ ++# Hack for Python 3 ++sys.path.insert(0, os.path.expandvars(os.path.abspath('..'))) + +-import urlgrabber.grabber as grabber ++from os.path import dirname, join as joinpath ++from urlgrabber import grabber + from urlgrabber.grabber import URLGrabber, urlgrab, urlopen, urlread + from urlgrabber.progress import text_progress_meter + +@@ -48,7 +52,7 @@ def main(): + os.unlink(tempdst) + + def setuptemp(size): +- if DEBUG: print 'writing %d KB to temporary file (%s).' % (size / 1024, tempsrc) ++ if DEBUG: print('writing %d KB to temporary file (%s).' % (size / 1024, tempsrc)) + file = open(tempsrc, 'w', 1024) + chars = '0123456789' + for i in range(size): +@@ -65,9 +69,9 @@ def speedtest(size): + + try: + from urlgrabber.progress import text_progress_meter +- except ImportError, e: ++ except ImportError as e: + tpm = None +- print 'not using progress meter' ++ print('not using progress meter') + else: + tpm = text_progress_meter(fo=open('/dev/null', 'w')) + +@@ -83,15 +87,15 @@ def speedtest(size): + # module. + + # get it nicely cached before we start comparing +- if DEBUG: print 'pre-caching' ++ if DEBUG: print('pre-caching') + for i in range(100): + urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies) + +- if DEBUG: print 'running speed test.' ++ if DEBUG: print('running speed test.') + reps = 500 + for i in range(reps): + if DEBUG: +- print '\r%4i/%-4i' % (i+1, reps), ++ six.print_('\r%4i/%-4i' % (i+1, reps), end=' ') + sys.stdout.flush() + t = time.time() + urlgrab(tempsrc, tempdst, +@@ -111,14 +115,14 @@ def speedtest(size): + while 1: + s = in_fo.read(1024 * 8) + if not s: break +- out_fo.write(s) ++ out_fo.write(s if not six.PY3 else s.encode('utf-8')) + in_fo.close() + out_fo.close() + none_times.append(1000 * (time.time() - t)) + +- if DEBUG: print '\r' ++ if DEBUG: print('\r') + +- print "%d KB Results:" % (size / 1024) ++ print("%d KB Results:" % (size / 1024)) + print_result('full', full_times) + print_result('raw', raw_times) + print_result('none', none_times) +@@ -131,7 +135,7 @@ def print_result(label, result_list): + for i in result_list: mean += i + mean = mean/len(result_list) + median = result_list[int(len(result_list)/2)] +- print format % (label, mean, median, result_list[0], result_list[-1]) ++ print(format % (label, mean, median, result_list[0], result_list[-1])) + + if __name__ == '__main__': + main() +diff --git a/test/munittest.py b/test/munittest.py +index 16a61ae..7e7969e 100644 +--- a/test/munittest.py ++++ b/test/munittest.py +@@ -103,9 +103,9 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + import time + import sys + import traceback +-import string + import os + import types ++import collections + + ############################################################################## + # Exported classes and functions +@@ -190,7 +190,7 @@ class TestResult: + + def _exc_info_to_string(self, err): + """Converts a sys.exc_info()-style tuple of values into a string.""" +- return string.join(traceback.format_exception(*err), '') ++ return ''.join(traceback.format_exception(*err)) + + def __repr__(self): + return "<%s run=%i errors=%i failures=%i>" % \ +@@ -251,8 +251,8 @@ class TestCase: + testMethod = getattr(self, methodName) + self._testMethodDoc = testMethod.__doc__ + except AttributeError: +- raise ValueError, "no such test method in %s: %s" % \ +- (self.__class__, methodName) ++ raise ValueError("no such test method in %s: %s" % \ ++ (self.__class__, methodName)) + + def setUp(self): + "Hook method for setting up the test fixture before exercising it." +@@ -276,7 +276,7 @@ class TestCase: + the specified test method's docstring. + """ + doc = self._testMethodDoc +- return doc and string.strip(string.split(doc, "\n")[0]) or None ++ return doc and doc.split("\n")[0].strip() or None + + def id(self): + return "%s.%s" % (_strclass(self.__class__), self._testMethodName) +@@ -361,15 +361,15 @@ class TestCase: + + def fail(self, msg=None): + """Fail immediately, with the given message.""" +- raise self.failureException, msg ++ raise self.failureException(msg) + + def failIf(self, expr, msg=None): + "Fail the test if the expression is true." +- if expr: raise self.failureException, msg ++ if expr: raise self.failureException(msg) + + def failUnless(self, expr, msg=None): + """Fail the test unless the expression is true.""" +- if not expr: raise self.failureException, msg ++ if not expr: raise self.failureException(msg) + + def failUnlessRaises(self, excClass, callableObj, *args, **kwargs): + """Fail unless an exception of class excClass is thrown +@@ -386,23 +386,21 @@ class TestCase: + else: + if hasattr(excClass,'__name__'): excName = excClass.__name__ + else: excName = str(excClass) +- raise self.failureException, excName ++ raise self.failureException(excName) + + def failUnlessEqual(self, first, second, msg=None): + """Fail if the two objects are unequal as determined by the '==' + operator. + """ + if not first == second: +- raise self.failureException, \ +- (msg or '%s != %s' % (`first`, `second`)) ++ raise self.failureException(msg or '%s != %s' % (repr(first), repr(second))) + + def failIfEqual(self, first, second, msg=None): + """Fail if the two objects are equal as determined by the '==' + operator. + """ + if first == second: +- raise self.failureException, \ +- (msg or '%s == %s' % (`first`, `second`)) ++ raise self.failureException(msg or '%s == %s' % (repr(first), repr(second))) + + def failUnlessAlmostEqual(self, first, second, places=7, msg=None): + """Fail if the two objects are unequal as determined by their +@@ -413,8 +411,7 @@ class TestCase: + as significant digits (measured from the most significant digit). + """ + if round(second-first, places) != 0: +- raise self.failureException, \ +- (msg or '%s != %s within %s places' % (`first`, `second`, `places` )) ++ raise self.failureException(msg or '%s != %s within %s places' % (repr(first), repr(second), repr(places) )) + + def failIfAlmostEqual(self, first, second, places=7, msg=None): + """Fail if the two objects are equal as determined by their +@@ -425,8 +422,7 @@ class TestCase: + as significant digits (measured from the most significant digit). + """ + if round(second-first, places) == 0: +- raise self.failureException, \ +- (msg or '%s == %s within %s places' % (`first`, `second`, `places`)) ++ raise self.failureException(msg or '%s == %s within %s places' % (repr(first), repr(second), repr(places))) + + assertEqual = assertEquals = failUnlessEqual + +@@ -442,15 +438,15 @@ class TestCase: + + def skip(self, msg=None): + """Skip the test""" +- raise self.skipException, msg ++ raise self.skipException(msg) + + def skipIf(self, expr, msg=None): + "Skip the test if the expression is true." +- if expr: raise self.skipException, msg ++ if expr: raise self.skipException(msg) + + def skipUnless(self, expr, msg=None): + """Skip the test unless the expression is true.""" +- if not expr: raise self.skipException, msg ++ if not expr: raise self.skipException(msg) + + + +@@ -554,7 +550,7 @@ class FunctionTestCase(TestCase): + def shortDescription(self): + if self._description is not None: return self._description + doc = self._testFunc.__doc__ +- return doc and string.strip(string.split(doc, "\n")[0]) or None ++ return doc and doc.split("\n")[0].strip() or None + + + +@@ -567,13 +563,12 @@ class TestLoader: + criteria and returning them wrapped in a Test + """ + testMethodPrefix = 'test' +- sortTestMethodsUsing = cmp + suiteClass = TestSuite + + def loadTestsFromTestCase(self, testCaseClass): + """Return a suite of all tests cases contained in testCaseClass""" + name_list = self.getTestCaseNames(testCaseClass) +- instance_list = map(testCaseClass, name_list) ++ instance_list = list(map(testCaseClass, name_list)) + description = getattr(testCaseClass, '__doc__') \ + or testCaseClass.__name__ + description = (description.splitlines()[0]).strip() +@@ -585,7 +580,7 @@ class TestLoader: + tests = [] + for name in dir(module): + obj = getattr(module, name) +- if (isinstance(obj, (type, types.ClassType)) and ++ if (isinstance(obj, type) and + issubclass(obj, TestCase) and + not obj in [TestCase, FunctionTestCase]): + tests.append(self.loadTestsFromTestCase(obj)) +@@ -603,15 +598,15 @@ class TestLoader: + + The method optionally resolves the names relative to a given module. + """ +- parts = string.split(name, '.') ++ parts = name.split('.') + if module is None: + if not parts: +- raise ValueError, "incomplete test name: %s" % name ++ raise ValueError("incomplete test name: %s" % name) + else: + parts_copy = parts[:] + while parts_copy: + try: +- module = __import__(string.join(parts_copy,'.')) ++ module = __import__('.'.join(parts_copy,)) + break + except ImportError: + del parts_copy[-1] +@@ -624,20 +619,19 @@ class TestLoader: + import unittest + if type(obj) == types.ModuleType: + return self.loadTestsFromModule(obj) +- elif (isinstance(obj, (type, types.ClassType)) and ++ elif (isinstance(obj, type) and + issubclass(obj, unittest.TestCase)): + return self.loadTestsFromTestCase(obj) + elif type(obj) == types.UnboundMethodType: +- return obj.im_class(obj.__name__) +- elif callable(obj): ++ return obj.__self__.__class__(obj.__name__) ++ elif isinstance(obj, collections.Callable): + test = obj() + if not isinstance(test, unittest.TestCase) and \ + not isinstance(test, unittest.TestSuite): +- raise ValueError, \ +- "calling %s returned %s, not a test" % (obj,test) ++ raise ValueError("calling %s returned %s, not a test" % (obj,test)) + return test + else: +- raise ValueError, "don't know how to make test from: %s" % obj ++ raise ValueError("don't know how to make test from: %s" % obj) + + def loadTestsFromNames(self, names, module=None): + """Return a suite of all tests cases found using the given sequence +@@ -651,14 +645,13 @@ class TestLoader: + def getTestCaseNames(self, testCaseClass): + """Return a sorted sequence of method names found within testCaseClass + """ +- testFnNames = filter(lambda n,p=self.testMethodPrefix: n[:len(p)] == p, +- dir(testCaseClass)) ++ testFnNames = list(filter(lambda n,p=self.testMethodPrefix: n[:len(p)] == p, ++ dir(testCaseClass))) + for baseclass in testCaseClass.__bases__: + for testFnName in self.getTestCaseNames(baseclass): + if testFnName not in testFnNames: # handle overridden methods + testFnNames.append(testFnName) +- if self.sortTestMethodsUsing: +- testFnNames.sort(self.sortTestMethodsUsing) ++ testFnNames.sort() + return testFnNames + + +@@ -670,21 +663,20 @@ defaultTestLoader = TestLoader() + # Patches for old functions: these functions should be considered obsolete + ############################################################################## + +-def _makeLoader(prefix, sortUsing, suiteClass=None): ++def _makeLoader(prefix, suiteClass=None): + loader = TestLoader() +- loader.sortTestMethodsUsing = sortUsing + loader.testMethodPrefix = prefix + if suiteClass: loader.suiteClass = suiteClass + return loader + +-def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp): +- return _makeLoader(prefix, sortUsing).getTestCaseNames(testCaseClass) ++def getTestCaseNames(testCaseClass, prefix): ++ return _makeLoader(prefix).getTestCaseNames(testCaseClass) + +-def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite): +- return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass) ++def makeSuite(testCaseClass, prefix='test',suiteClass=TestSuite): ++ return _makeLoader(prefix, suiteClass).loadTestsFromTestCase(testCaseClass) + +-def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite): +- return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromModule(module) ++def findTestCases(module, prefix='test',suiteClass=TestSuite): ++ return _makeLoader(prefix, suiteClass).loadTestsFromModule(module) + + + ############################################################################## +@@ -825,8 +817,8 @@ class TextTestRunner: + self.stream.writeln() + if not result.wasSuccessful(): + self.stream.write("FAILED (") +- failed, errored, skipped = map(len, \ +- (result.failures, result.errors, result.skipped)) ++ failed, errored, skipped = list(map(len, \ ++ (result.failures, result.errors, result.skipped))) + if failed: + self.stream.write("failures=%d" % failed) + if errored: +@@ -871,7 +863,7 @@ Examples: + argv=None, testRunner=None, testLoader=defaultTestLoader): + if type(module) == type(''): + self.module = __import__(module) +- for part in string.split(module,'.')[1:]: ++ for part in module.split('.')[1:]: + self.module = getattr(self.module, part) + else: + self.module = module +@@ -886,8 +878,8 @@ Examples: + self.runTests() + + def usageExit(self, msg=None): +- if msg: print msg +- print self.USAGE % self.__dict__ ++ if msg: print(msg) ++ print(self.USAGE % self.__dict__) + sys.exit(2) + + def parseArgs(self, argv): +@@ -910,7 +902,7 @@ Examples: + else: + self.testNames = (self.defaultTest,) + self.createTests() +- except getopt.error, msg: ++ except getopt.error as msg: + self.usageExit(msg) + + def createTests(self): +diff --git a/test/runtests.py b/test/runtests.py +index c48bd1d..78a5974 100644 +--- a/test/runtests.py ++++ b/test/runtests.py +@@ -54,7 +54,7 @@ def parse_args(): + return (descriptions,verbosity) + + def usage(): +- print __doc__ ++ print(__doc__) + + if __name__ == '__main__': + main() +diff --git a/test/test_byterange.py b/test/test_byterange.py +index 0f75807..0863be8 100644 +--- a/test/test_byterange.py ++++ b/test/test_byterange.py +@@ -24,8 +24,11 @@ + # $Id: test_byterange.py,v 1.6 2004/03/31 17:02:00 mstenner Exp $ + + import sys ++import six + +-from cStringIO import StringIO ++from io import StringIO ++ ++import urlgrabber + from urlgrabber.byterange import RangeableFileObject + + from base_test_code import * +@@ -37,7 +40,7 @@ class RangeableFileObjectTestCase(TestCase): + # 0 1 2 3 4 5 6 7 8 9 + # 0123456789012345678901234567890123456789012345678901234567 890123456789012345678901234567890 + self.test = 'Why cannot we write the entire 24 volumes of Encyclopaedia\nBrittanica on the head of a pin?\n' +- self.fo = StringIO(self.test) ++ self.fo = StringIO(unicode(self.test) if not six.PY3 else self.test) + self.rfo = RangeableFileObject(self.fo, (20,69)) + + def tearDown(self): +@@ -61,7 +64,8 @@ class RangeableFileObjectTestCase(TestCase): + + def test_readall(self): + """RangeableFileObject.read(): to end of file.""" +- rfo = RangeableFileObject(StringIO(self.test),(11,)) ++ text_compat = unicode(self.test) if not six.PY3 else self.test ++ rfo = RangeableFileObject(StringIO(text_compat),(11,)) + self.assertEquals(self.test[11:],rfo.read()) + + def test_readline(self): +diff --git a/test/test_grabber.py b/test/test_grabber.py +index 8e45d25..bd36d66 100644 +--- a/test/test_grabber.py ++++ b/test/test_grabber.py +@@ -24,11 +24,13 @@ + # $Id: test_grabber.py,v 1.31 2006/12/08 00:14:16 mstenner Exp $ + + import sys ++import six + import os +-import string, tempfile, random, cStringIO, os +-import urllib2 ++import tempfile, random, os ++from six.moves import urllib + import socket + ++from io import StringIO + from base_test_code import * + + import urlgrabber +@@ -41,12 +43,12 @@ class FileObjectTests(TestCase): + + def setUp(self): + self.filename = tempfile.mktemp() +- fo = file(self.filename, 'wb') +- fo.write(reference_data) ++ fo = open(self.filename, 'wb') ++ fo.write(reference_data.encode('utf-8')) + fo.close() + +- self.fo_input = cStringIO.StringIO(reference_data) +- self.fo_output = cStringIO.StringIO() ++ self.fo_input = StringIO(unicode(reference_data) if not six.PY3 else reference_data) ++ self.fo_output = StringIO() + (url, parts) = grabber.default_grabber.opts.urlparser.parse( + self.filename, grabber.default_grabber.opts) + self.wrapper = grabber.PyCurlFileObject( +@@ -73,7 +75,7 @@ class FileObjectTests(TestCase): + def test_readlines(self): + "PyCurlFileObject .readlines() method" + li = self.wrapper.readlines() +- self.fo_output.write(string.join(li, '')) ++ self.fo_output.write(''.join(li)) + self.assert_(reference_data == self.fo_output.getvalue()) + + def test_smallread(self): +@@ -90,7 +92,7 @@ class HTTPTests(TestCase): + filename = tempfile.mktemp() + grabber.urlgrab(ref_http, filename) + +- fo = file(filename, 'rb') ++ fo = open(filename, 'rb' if not six.PY3 else 'r') + contents = fo.read() + fo.close() + +@@ -136,7 +138,7 @@ class URLGrabberTestCase(TestCase): + + def setUp(self): + +- self.meter = text_progress_meter( fo=cStringIO.StringIO() ) ++ self.meter = text_progress_meter( fo=StringIO() ) + pass + + def tearDown(self): +@@ -149,7 +151,7 @@ class URLGrabberTestCase(TestCase): + values into the URLGrabber constructor and checks that + they've been set properly. + """ +- opener = urllib2.OpenerDirector() ++ opener = urllib.request.OpenerDirector() + g = URLGrabber( progress_obj=self.meter, + throttle=0.9, + bandwidth=20, +@@ -225,13 +227,13 @@ class URLParserTestCase(TestCase): + self.assertEquals(parts, urllist[2]) + else: + if url == urllist[1] and parts == urllist[2]: +- print 'OK: %s' % urllist[0] ++ print('OK: %s' % urllist[0]) + else: +- print 'ERROR: %s' % urllist[0] +- print ' ' + urllist[1] +- print ' ' + url +- print ' ' + urllist[2] +- print ' ' + parts ++ print('ERROR: %s' % urllist[0]) ++ print(' ' + urllist[1]) ++ print(' ' + url) ++ print(' ' + urllist[2]) ++ print(' ' + parts) + + + url_tests_all = ( +@@ -380,7 +382,7 @@ class CheckfuncTestCase(TestCase): + + if hasattr(obj, 'filename'): + # we used urlgrab +- fo = file(obj.filename) ++ fo = open(obj.filename) + data = fo.read() + fo.close() + else: +@@ -447,12 +449,12 @@ class RegetTestBase: + except: pass + + def _make_half_zero_file(self): +- fo = file(self.filename, 'wb') +- fo.write('0'*self.hl) ++ fo = open(self.filename, 'wb' if not six.PY3 else 'w') ++ fo.write('0'*int(self.hl)) + fo.close() + + def _read_file(self): +- fo = file(self.filename, 'rb') ++ fo = open(self.filename, 'rb' if not six.PY3 else 'r') + data = fo.read() + fo.close() + return data +@@ -470,7 +472,7 @@ class FTPRegetTests(RegetTestBase, TestCase): + # this tests to see if the server is available. If it's not, + # then these tests will be skipped + try: +- fo = urllib2.urlopen(self.url).close() ++ fo = urllib.request.urlopen(self.url).close() + except IOError: + self.skip() + +@@ -480,8 +482,8 @@ class FTPRegetTests(RegetTestBase, TestCase): + self.grabber.urlgrab(self.url, self.filename, reget='simple') + data = self._read_file() + +- self.assertEquals(data[:self.hl], '0'*self.hl) +- self.assertEquals(data[self.hl:], self.ref[self.hl:]) ++ self.assertEquals(data[:int(self.hl)], '0'*int(self.hl)) ++ self.assertEquals(data[int(self.hl):], self.ref[int(self.hl):]) + + class HTTPRegetTests(FTPRegetTests): + def setUp(self): +@@ -498,8 +500,8 @@ class HTTPRegetTests(FTPRegetTests): + self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp') + data = self._read_file() + +- self.assertEquals(data[:self.hl], '0'*self.hl) +- self.assertEquals(data[self.hl:], self.ref[self.hl:]) ++ self.assertEquals(data[:int(self.hl)], '0'*int(self.hl)) ++ self.assertEquals(data[int(self.hl):], self.ref[int(self.hl):]) + except NotImplementedError: + self.skip() + +@@ -521,7 +523,7 @@ class FileRegetTests(HTTPRegetTests): + def setUp(self): + self.ref = short_reference_data + tmp = tempfile.mktemp() +- tmpfo = file(tmp, 'wb') ++ tmpfo = open(tmp, 'wb' if not six.PY3 else 'w') + tmpfo.write(self.ref) + tmpfo.close() + self.tmp = tmp +@@ -545,7 +547,7 @@ class ProFTPDSucksTests(TestCase): + def setUp(self): + self.url = ref_proftp + try: +- fo = urllib2.urlopen(self.url).close() ++ fo = urllib.request.urlopen(self.url).close() + except IOError: + self.skip() + +@@ -592,7 +594,7 @@ class ProxyFTPAuthTests(ProxyHTTPAuthTests): + if not self.have_proxy(): + self.skip() + try: +- fo = urllib2.urlopen(self.url).close() ++ fo = urllib.request.urlopen(self.url).close() + except IOError: + self.skip() + self.g = URLGrabber() +diff --git a/test/test_mirror.py b/test/test_mirror.py +index 7f493d0..c46cd33 100644 +--- a/test/test_mirror.py ++++ b/test/test_mirror.py +@@ -24,8 +24,9 @@ + # $Id: test_mirror.py,v 1.12 2005/10/22 21:57:27 mstenner Exp $ + + import sys ++import six + import os +-import string, tempfile, random, cStringIO, os ++import string, tempfile, random, os + + import urlgrabber.grabber + from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions +@@ -268,7 +269,8 @@ class ActionTests(TestCase): + self.assertEquals(self.g.calls, expected_calls) + self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) + +-import thread, socket ++from six.moves import _thread as thread ++import socket + LOCALPORT = 'localhost', 2000 + + class HttpReplyCode(TestCase): +@@ -282,11 +284,14 @@ class HttpReplyCode(TestCase): + while 1: + c, a = s.accept() + if self.exit: c.close(); break +- while not c.recv(4096).endswith('\r\n\r\n'): pass +- c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) ++ ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n' ++ while not c.recv(4096).endswith(ending_compat): pass ++ http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply ++ c.sendall(http_compat if not six.PY3 else http_compat.encode('utf-8')) + if self.content is not None: +- c.sendall('Content-Length: %d\r\n\r\n' % len(self.content)) +- c.sendall(self.content) ++ cont_length_compat = 'Content-Length: %d\r\n\r\n' % len(self.content) ++ c.sendall(cont_length_compat if not six.PY3 else cont_length_compat.encode('utf-8')) ++ c.sendall(self.content if not six.PY3 else self.content.encode('utf-8')) + c.close() + s.close() + self.exit = False diff --git a/port-to-python3.patch b/port-to-python3.patch new file mode 100644 index 0000000..41b6742 --- /dev/null +++ b/port-to-python3.patch @@ -0,0 +1,1175 @@ +commit 7d6b90e17d333535549e2d3ec1cf41845a9b876f +Author: Tomas Radej +Date: Wed Aug 20 13:32:18 2014 +0200 + + Ported main code + +diff --git a/urlgrabber/__init__.py b/urlgrabber/__init__.py +index b3047b0..636849c 100644 +--- a/urlgrabber/__init__.py ++++ b/urlgrabber/__init__.py +@@ -52,4 +52,4 @@ __author__ = 'Michael D. Stenner , ' \ + 'Zdenek Pavlas ' + __url__ = 'http://urlgrabber.baseurl.org/' + +-from grabber import urlgrab, urlopen, urlread ++from urlgrabber.grabber import urlgrab, urlopen, urlread +diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py +index 5efa160..ffaed8e 100644 +--- a/urlgrabber/byterange.py ++++ b/urlgrabber/byterange.py +@@ -18,24 +18,22 @@ + # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko + + ++import email ++import mimetypes + import os ++import six + import stat +-import urllib +-import urllib2 +-import rfc822 ++from six.moves import urllib + + DEBUG = None + +-try: +- from cStringIO import StringIO +-except ImportError, msg: +- from StringIO import StringIO ++from io import StringIO + + class RangeError(IOError): + """Error raised when an unsatisfiable range is requested.""" + pass + +-class HTTPRangeHandler(urllib2.BaseHandler): ++class HTTPRangeHandler(urllib.request.BaseHandler): + """Handler that enables HTTP Range headers. + + This was extremely simple. The Range header is a HTTP feature to +@@ -120,7 +118,7 @@ class RangeableFileObject: + in self.fo. This includes methods.""" + if hasattr(self.fo, name): + return getattr(self.fo, name) +- raise AttributeError, name ++ raise AttributeError(name) + + def tell(self): + """Return the position within the range. +@@ -211,37 +209,36 @@ class RangeableFileObject: + raise RangeError(9, 'Requested Range Not Satisfiable') + pos+= bufsize + +-class FileRangeHandler(urllib2.FileHandler): ++class FileRangeHandler(urllib.request.FileHandler): + """FileHandler subclass that adds Range support. + This class handles Range headers exactly like an HTTP + server would. + """ + def open_local_file(self, req): +- import mimetypes +- import mimetools + host = req.get_host() + file = req.get_selector() +- localfile = urllib.url2pathname(file) ++ localfile = urllib.request.url2pathname(file) + stats = os.stat(localfile) + size = stats[stat.ST_SIZE] +- modified = rfc822.formatdate(stats[stat.ST_MTIME]) ++ modified = email.utils.formatdate(stats[stat.ST_MTIME]) + mtype = mimetypes.guess_type(file)[0] + if host: +- host, port = urllib.splitport(host) ++ host, port = urllib.parse.splitport(host) + if port or socket.gethostbyname(host) not in self.get_names(): +- raise urllib2.URLError('file not on local host') ++ raise urllib.error.URLError('file not on local host') + fo = open(localfile,'rb') + brange = req.headers.get('Range',None) + brange = range_header_to_tuple(brange) + assert brange != () + if brange: + (fb,lb) = brange +- if lb == '': lb = size ++ if lb == '': ++ lb = size + if fb < 0 or fb > size or lb > size: + raise RangeError(9, 'Requested Range Not Satisfiable') + size = (lb - fb) + fo = RangeableFileObject(fo, (fb,lb)) +- headers = mimetools.Message(StringIO( ++ headers = email.message.Message(StringIO( + 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified))) + return urllib.addinfourl(fo, headers, 'file:'+file) +@@ -254,42 +251,39 @@ class FileRangeHandler(urllib2.FileHandler): + # follows: + # -- range support modifications start/end here + +-from urllib import splitport, splituser, splitpasswd, splitattr, \ +- unquote, addclosehook, addinfourl + import ftplib + import socket + import sys +-import mimetypes +-import mimetools ++from six.moves.urllib.parse import urlparse, unquote ++ ++# Very old functions and classes, undocumented in current Python releases ++if six.PY3: ++ from urllib.request import splitattr ++ from urllib.response import addinfourl ++else: ++ from urllib import splitattr ++ from urllib import addinfourl + +-class FTPRangeHandler(urllib2.FTPHandler): ++ ++class FTPRangeHandler(urllib.request.FTPHandler): + def ftp_open(self, req): + host = req.get_host() + if not host: +- raise IOError, ('ftp error', 'no host given') +- host, port = splitport(host) +- if port is None: +- port = ftplib.FTP_PORT +- else: +- port = int(port) ++ raise IOError('ftp error', 'no host given') + +- # username/password handling +- user, host = splituser(host) +- if user: +- user, passwd = splitpasswd(user) +- else: +- passwd = None ++ parsed = urlparse(host) ++ port = parsed.port or ftplib.FTP_PORT ++ user = unquote(parsed.username or '') ++ passwd = unquote(parsed.passwd or '') + host = unquote(host) +- user = unquote(user or '') +- passwd = unquote(passwd or '') + + try: + host = socket.gethostbyname(host) +- except socket.error, msg: +- raise urllib2.URLError(msg) ++ except socket.error as msg: ++ raise urllib.error.URLError(msg) + path, attrs = splitattr(req.get_selector()) + dirs = path.split('/') +- dirs = map(unquote, dirs) ++ dirs = list(map(unquote, dirs)) + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] +@@ -336,24 +330,36 @@ class FTPRangeHandler(urllib2.FTPHandler): + if retrlen is not None and retrlen >= 0: + headers += "Content-Length: %d\n" % retrlen + sf = StringIO(headers) +- headers = mimetools.Message(sf) ++ headers = email.message.Message(sf) + return addinfourl(fp, headers, req.get_full_url()) +- except ftplib.all_errors, msg: +- raise IOError, ('ftp error', msg), sys.exc_info()[2] ++ except ftplib.all_errors as msg: ++ error = IOError('ftp error', msg) ++ six.reraise(error.__class__, error, sys.exc_info()[2]) + + def connect_ftp(self, user, passwd, host, port, dirs): + fw = ftpwrapper(user, passwd, host, port, dirs) + return fw + +-class ftpwrapper(urllib.ftpwrapper): ++# Very old functions and classes, undocumented in current Python releases ++if six.PY3: ++ from urllib.request import ftpwrapper, addclosehook ++else: ++ from urllib import ftpwrapper, addclosehook ++ ++ ++class ftpwrapper(ftpwrapper): + # range support note: + # this ftpwrapper code is copied directly from + # urllib. The only enhancement is to add the rest + # argument and pass it on to ftp.ntransfercmd + def retrfile(self, file, type, rest=None): + self.endtransfer() +- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 +- else: cmd = 'TYPE ' + type; isdir = 0 ++ if type in ('d', 'D'): ++ cmd = 'TYPE A' ++ isdir = 1 ++ else: ++ cmd = 'TYPE ' + type ++ isdir = 0 + try: + self.ftp.voidcmd(cmd) + except ftplib.all_errors: +@@ -364,22 +370,23 @@ class ftpwrapper(urllib.ftpwrapper): + # Use nlst to see if the file exists at all + try: + self.ftp.nlst(file) +- except ftplib.error_perm, reason: +- raise IOError, ('ftp error', reason), sys.exc_info()[2] ++ except ftplib.error_perm as reason: ++ error = IOError('ftp error', reason) ++ six.reraise(error.__class__, error, sys.exc_info()[2]) + # Restore the transfer mode! + self.ftp.voidcmd(cmd) + # Try to retrieve as a file + try: + cmd = 'RETR ' + file + conn = self.ftp.ntransfercmd(cmd, rest) +- except ftplib.error_perm, reason: ++ except ftplib.error_perm as reason: + if str(reason)[:3] == '501': + # workaround for REST not supported error + fp, retrlen = self.retrfile(file, type) + fp = RangeableFileObject(fp, (rest,'')) + return (fp, retrlen) + elif str(reason)[:3] != '550': +- raise IOError, ('ftp error', reason), sys.exc_info()[2] ++ six.reraise(IOError, ('ftp error', reason), sys.exc_info()[2]) + if not conn: + # Set transfer mode to ASCII! + self.ftp.voidcmd('TYPE A') +@@ -458,6 +465,7 @@ def range_tuple_normalize(range_tup): + # check if range is over the entire file + if (fb,lb) == (0,''): return None + # check that the range is valid +- if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb)) ++ if lb != '' and lb < fb: ++ raise RangeError(9, 'Invalid byte range: %s-%s' % (fb, lb)) + return (fb,lb) + +diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py +index f8deeb8..35c091e 100644 +--- a/urlgrabber/grabber.py ++++ b/urlgrabber/grabber.py +@@ -499,22 +499,24 @@ BANDWIDTH THROTTLING + + import os + import sys +-import urlparse + import time ++import collections ++import fcntl ++import pycurl ++import select ++import six ++import socket ++import stat + import string +-import urllib +-import urllib2 +-from httplib import responses +-import mimetools +-import thread + import types +-import stat +-import pycurl ++from email.message import Message + from ftplib import parse150 +-from StringIO import StringIO +-from httplib import HTTPException +-import socket, select, fcntl +-from byterange import range_tuple_normalize, range_tuple_to_header, RangeError ++from six.moves import _thread as thread ++from six.moves import urllib ++from six.moves.http_client import responses, HTTPException ++from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError ++ ++from io import StringIO + + try: + import xattr +@@ -535,7 +537,7 @@ except: + try: + # this part isn't going to do much - need to talk to gettext + from i18n import _ +-except ImportError, msg: ++except ImportError as msg: + def _(st): return st + + ######################################################################## +@@ -635,6 +637,8 @@ def _(st): + + def _to_utf8(obj, errors='replace'): + '''convert 'unicode' to an encoded utf-8 byte string ''' ++ if six.PY3: ++ return obj + # stolen from yum.i18n + if isinstance(obj, unicode): + obj = obj.encode('utf-8', errors) +@@ -791,14 +795,14 @@ class URLParser: + if opts.prefix: + url = self.add_prefix(url, opts.prefix) + +- parts = urlparse.urlparse(url) ++ parts = urllib.parse.urlparse(url) + (scheme, host, path, parm, query, frag) = parts + +- if not scheme or (len(scheme) == 1 and scheme in string.letters): ++ if not scheme or (len(scheme) == 1 and scheme in string.ascii_letters): + # if a scheme isn't specified, we guess that it's "file:" + if url[0] not in '/\\': url = os.path.abspath(url) +- url = 'file:' + urllib.pathname2url(url) +- parts = urlparse.urlparse(url) ++ url = 'file:' + urllib.request.pathname2url(url) ++ parts = urllib.parse.urlparse(url) + quote = 0 # pathname2url quotes, so we won't do it again + + if scheme in ['http', 'https']: +@@ -809,7 +813,7 @@ class URLParser: + if quote: + parts = self.quote(parts) + +- url = urlparse.urlunparse(parts) ++ url = urllib.parse.urlunparse(parts) + return url, parts + + def add_prefix(self, url, prefix): +@@ -833,7 +837,7 @@ class URLParser: + passing into urlgrabber. + """ + (scheme, host, path, parm, query, frag) = parts +- path = urllib.quote(path) ++ path = urllib.parse.quote(path) + return (scheme, host, path, parm, query, frag) + + hexvals = '0123456789ABCDEF' +@@ -850,7 +854,7 @@ class URLParser: + (scheme, host, path, parm, query, frag) = parts + if ' ' in path: + return 1 +- ind = string.find(path, '%') ++ ind = path.find('%') + if ind > -1: + while ind > -1: + if len(path) < ind+3: +@@ -859,7 +863,7 @@ class URLParser: + if code[0] not in self.hexvals or \ + code[1] not in self.hexvals: + return 1 +- ind = string.find(path, '%', ind+1) ++ ind = path.find('%', ind+1) + return 0 + return 1 + +@@ -879,13 +883,13 @@ class URLGrabberOptions: + def __getattr__(self, name): + if self.delegate and hasattr(self.delegate, name): + return getattr(self.delegate, name) +- raise AttributeError, name ++ raise AttributeError(name) + + def raw_throttle(self): + """Calculate raw throttle value from throttle and bandwidth + values. + """ +- if self.throttle <= 0: ++ if self.throttle is None or self.throttle <= 0: + return 0 + elif type(self.throttle) == type(0): + return float(self.throttle) +@@ -937,7 +941,7 @@ class URLGrabberOptions: + def _set_attributes(self, **kwargs): + """Update object attributes with those provided in kwargs.""" + self.__dict__.update(kwargs) +- if kwargs.has_key('range'): ++ if 'range' in kwargs: + # normalize the supplied range value + self.range = range_tuple_normalize(self.range) + if not self.reget in [None, 'simple', 'check_timestamp']: +@@ -1006,7 +1010,7 @@ class URLGrabberOptions: + return self.format() + + def format(self, indent=' '): +- keys = self.__dict__.keys() ++ keys = list(self.__dict__.keys()) + if self.delegate is not None: + keys.remove('delegate') + keys.sort() +@@ -1026,7 +1030,7 @@ def _do_raise(obj): + def _run_callback(cb, obj): + if not cb: + return +- if callable(cb): ++ if isinstance(cb, collections.Callable): + return cb(obj) + cb, arg, karg = cb + return cb(obj, *arg, **karg) +@@ -1058,16 +1062,15 @@ class URLGrabber(object): + tries = tries + 1 + exception = None + callback = None +- if DEBUG: DEBUG.info('attempt %i/%s: %s', +- tries, opts.retry, args[0]) ++ if DEBUG: DEBUG.info('attempt %i/%s: %s', tries, opts.retry, args[0]) + try: +- r = apply(func, (opts,) + args, {}) ++ r = func(*(opts,) + args, **{}) + if DEBUG: DEBUG.info('success') + return r +- except URLGrabError, e: ++ except URLGrabError as e: + exception = e + callback = opts.failure_callback +- except KeyboardInterrupt, e: ++ except KeyboardInterrupt as e: + exception = e + callback = opts.interrupt_callback + if not callback: +@@ -1082,13 +1085,13 @@ class URLGrabber(object): + + if (opts.retry is None) or (tries == opts.retry): + if DEBUG: DEBUG.info('retries exceeded, re-raising') +- raise ++ raise exception + + retrycode = getattr(exception, 'errno', None) + if (retrycode is not None) and (retrycode not in opts.retrycodes): + if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', + retrycode, opts.retrycodes) +- raise ++ raise exception + + def urlopen(self, url, opts=None, **kwargs): + """open the url and return a file object +@@ -1119,14 +1122,14 @@ class URLGrabber(object): + (scheme, host, path, parm, query, frag) = parts + opts.find_proxy(url, scheme) + if filename is None: +- filename = os.path.basename( urllib.unquote(path) ) ++ filename = os.path.basename( urllib.parse.unquote(path) ) + if not filename: + # This is better than nothing. + filename = 'index.html' + if scheme == 'file' and not opts.copy_local: + # just return the name of the local file - don't make a + # copy currently +- path = urllib.url2pathname(path) ++ path = urllib.request.url2pathname(path) + if host: + path = os.path.normpath('//' + host + path) + if not os.path.exists(path): +@@ -1170,7 +1173,7 @@ class URLGrabber(object): + + try: + return self._retry(opts, retryfunc, url, filename) +- except URLGrabError, e: ++ except URLGrabError as e: + _TH.update(url, 0, 0, e) + opts.exception = e + return _run_callback(opts.failfunc, opts) +@@ -1219,7 +1222,7 @@ class URLGrabber(object): + + def _make_callback(self, callback_obj): + # not used, left for compatibility +- if callable(callback_obj): ++ if isinstance(callback_obj, collections.Callable): + return callback_obj, (), {} + else: + return callback_obj +@@ -1235,13 +1238,13 @@ class PyCurlFileObject(object): + self._hdr_dump = '' + self._parsed_hdr = None + self.url = url +- self.scheme = urlparse.urlsplit(self.url)[0] ++ self.scheme = urllib.parse.urlsplit(self.url)[0] + self.filename = filename + self.append = False + self.reget_time = None + self.opts = opts + if self.opts.reget == 'check_timestamp': +- raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this." ++ raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.") + self._complete = False + self._rbuf = '' + self._rbufsize = 1024*8 +@@ -1266,7 +1269,7 @@ class PyCurlFileObject(object): + + if hasattr(self.fo, name): + return getattr(self.fo, name) +- raise AttributeError, name ++ raise AttributeError(name) + + def _retrieve(self, buf): + try: +@@ -1280,7 +1283,7 @@ class PyCurlFileObject(object): + if self.opts.progress_obj: + size = self.size + self._reget_length + self.opts.progress_obj.start(self._prog_reportname, +- urllib.unquote(self.url), ++ urllib.parse.unquote(self.url), + self._prog_basename, + size=size, + text=self.opts.text) +@@ -1295,10 +1298,16 @@ class PyCurlFileObject(object): + start = self._range[0] - pos + stop = self._range[1] - pos + if start < len(buf) and stop > 0: +- self.fo.write(buf[max(start, 0):stop]) ++ if not six.PY3 or isinstance(self.fo, StringIO): ++ self.fo.write(buf[max(start, 0):stop].decode('utf-8')) ++ else: ++ self.fo.write(buf[max(start, 0):stop]) + else: +- self.fo.write(buf) +- except IOError, e: ++ if not six.PY3 or isinstance(self.fo, StringIO): ++ self.fo.write(buf.decode('utf-8')) ++ else: ++ self.fo.write(buf) ++ except IOError as e: + self._cb_error = URLGrabError(16, exception2msg(e)) + return -1 + return len(buf) +@@ -1319,10 +1328,12 @@ class PyCurlFileObject(object): + # but we can't do that w/o making it do 2 connects, which sucks + # so we cheat and stuff it in here in the hdr_retrieve + if self.scheme in ['http','https']: +- if buf.lower().find('content-length:') != -1: +- length = buf.split(':')[1] ++ content_length_str = 'content-length:' if not six.PY3 else b'content-length:' ++ if buf.lower().find(content_length_str) != -1: ++ split_str = ':' if not six.PY3 else b':' ++ length = buf.split(split_str)[1] + self.size = int(length) +- elif (self.append or self.opts.range) and self._hdr_dump == '' and ' 200 ' in buf: ++ elif (self.append or self.opts.range) and self._hdr_dump == '' and b' 200 ' in buf: + # reget was attempted but server sends it all + # undo what we did in _build_range() + self.append = False +@@ -1333,23 +1344,26 @@ class PyCurlFileObject(object): + self.fo.truncate(0) + elif self.scheme in ['ftp']: + s = None +- if buf.startswith('213 '): ++ if buf.startswith(b'213 '): + s = buf[3:].strip() + if len(s) >= 14: + s = None # ignore MDTM responses +- elif buf.startswith('150 '): +- s = parse150(buf) ++ elif buf.startswith(b'150 '): ++ s = parse150(buf if not six.PY3 else buf.decode('utf-8')) + if s: + self.size = int(s) + +- if buf.lower().find('location') != -1: +- location = ':'.join(buf.split(':')[1:]) ++ location_str = 'location' if not six.PY3 else b'location' ++ if buf.lower().find(location_str) != -1: ++ buf_compat = buf if not six.PY3 else buf.decode('utf-8') ++ location = ':'.join(buf_compat.split(':')[1:]) + location = location.strip() +- self.scheme = urlparse.urlsplit(location)[0] ++ self.scheme = urllib.parse.urlsplit(location)[0] + self.url = location + +- self._hdr_dump += buf +- if len(self._hdr_dump) != 0 and buf == '\r\n': ++ self._hdr_dump += buf if not six.PY3 else buf.decode('utf-8') ++ end_str = '\r\n' if not six.PY3 else b'\r\n' ++ if len(self._hdr_dump) != 0 and buf == end_str: + self._hdr_ended = True + if DEBUG: DEBUG.debug('header ended:') + +@@ -1365,7 +1379,7 @@ class PyCurlFileObject(object): + hdrfp = StringIO() + hdrfp.write(self._hdr_dump[statusend:]) + hdrfp.seek(0) +- self._parsed_hdr = mimetools.Message(hdrfp) ++ self._parsed_hdr = Message(hdrfp) + return self._parsed_hdr + + hdr = property(_return_hdr_obj) +@@ -1490,7 +1504,7 @@ class PyCurlFileObject(object): + + try: + self.curl_obj.perform() +- except pycurl.error, e: ++ except pycurl.error as e: + # XXX - break some of these out a bit more clearly + # to other URLGrabErrors from + # http://curl.haxx.se/libcurl/c/libcurl-errors.html +@@ -1498,7 +1512,7 @@ class PyCurlFileObject(object): + + code = self.http_code + errcode = e.args[0] +- errurl = urllib.unquote(self.url) ++ errurl = urllib.parse.unquote(self.url) + + if self._error[0]: + errcode = self._error[0] +@@ -1588,7 +1602,7 @@ class PyCurlFileObject(object): + if self._error[1]: + msg = self._error[1] + err = URLGrabError(14, msg) +- err.url = urllib.unquote(self.url) ++ err.url = urllib.parse.unquote(self.url) + raise err + + def _do_open(self): +@@ -1605,7 +1619,7 @@ class PyCurlFileObject(object): + def _build_range(self): + reget_length = 0 + rt = None +- if self.opts.reget and type(self.filename) in types.StringTypes: ++ if self.opts.reget and type(self.filename) in (type(str()), six.text_type): + # we have reget turned on and we're dumping to a file + try: + s = os.stat(self.filename) +@@ -1655,22 +1669,22 @@ class PyCurlFileObject(object): + else: + fo = opener.open(req) + hdr = fo.info() +- except ValueError, e: ++ except ValueError as e: + err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, )) + err.url = self.url + raise err + +- except RangeError, e: ++ except RangeError as e: + err = URLGrabError(9, _('%s on %s') % (e, self.url)) + err.url = self.url + raise err +- except urllib2.HTTPError, e: ++ except urllib.error.HTTPError as e: + new_e = URLGrabError(14, _('%s on %s') % (e, self.url)) + new_e.code = e.code + new_e.exception = e + new_e.url = self.url + raise new_e +- except IOError, e: ++ except IOError as e: + if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): + err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) + err.url = self.url +@@ -1680,12 +1694,12 @@ class PyCurlFileObject(object): + err.url = self.url + raise err + +- except OSError, e: ++ except OSError as e: + err = URLGrabError(5, _('%s on %s') % (e, self.url)) + err.url = self.url + raise err + +- except HTTPException, e: ++ except HTTPException as e: + err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \ + (e.__class__.__name__, self.url, e)) + err.url = self.url +@@ -1700,19 +1714,21 @@ class PyCurlFileObject(object): + if self._complete: + return + _was_filename = False +- if type(self.filename) in types.StringTypes and self.filename: ++ if self.filename and type(self.filename) in (type(str()), six.text_type): + _was_filename = True + self._prog_reportname = str(self.filename) + self._prog_basename = os.path.basename(self.filename) + +- if self.append: mode = 'ab' +- else: mode = 'wb' ++ if self.append: ++ mode = 'ab' ++ else: ++ mode = 'wb' + +- if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \ +- (self.filename, mode)) ++ if DEBUG: ++ DEBUG.info('opening local file "%s" with mode %s' % (self.filename, mode)) + try: + self.fo = open(self.filename, mode) +- except IOError, e: ++ except IOError as e: + err = URLGrabError(16, _(\ + 'error opening local file from %s, IOError: %s') % (self.url, e)) + err.url = self.url +@@ -1731,7 +1747,7 @@ class PyCurlFileObject(object): + + try: + self._do_perform() +- except URLGrabError, e: ++ except URLGrabError as e: + self.fo.flush() + self.fo.close() + raise e +@@ -1754,7 +1770,7 @@ class PyCurlFileObject(object): + if mod_time != -1: + try: + os.utime(self.filename, (mod_time, mod_time)) +- except OSError, e: ++ except OSError as e: + err = URLGrabError(16, _(\ + 'error setting timestamp on file %s from %s, OSError: %s') + % (self.filename, self.url, e)) +@@ -1763,7 +1779,7 @@ class PyCurlFileObject(object): + # re open it + try: + self.fo = open(self.filename, 'r') +- except IOError, e: ++ except IOError as e: + err = URLGrabError(16, _(\ + 'error opening file from %s, IOError: %s') % (self.url, e)) + err.url = self.url +@@ -1809,25 +1825,27 @@ class PyCurlFileObject(object): + else: readamount = min(amt, self._rbufsize) + try: + new = self.fo.read(readamount) +- except socket.error, e: ++ except socket.error as e: + err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e)) + err.url = self.url + raise err + +- except socket.timeout, e: ++ except socket.timeout as e: + raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) + err.url = self.url + raise err + +- except IOError, e: ++ except IOError as e: + raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e)) + err.url = self.url + raise err + + newsize = len(new) +- if not newsize: break # no more to read ++ if not newsize: ++ break # no more to read + +- if amt: amt = amt - newsize ++ if amt: ++ amt = amt - newsize + buf.append(new) + bufsize = bufsize + newsize + self._tsize = newsize +@@ -1835,7 +1853,7 @@ class PyCurlFileObject(object): + #if self.opts.progress_obj: + # self.opts.progress_obj.update(self._amount_read) + +- self._rbuf = string.join(buf, '') ++ self._rbuf = ''.join(buf) + return + + def _progress_update(self, download_total, downloaded, upload_total, uploaded): +@@ -1879,12 +1897,12 @@ class PyCurlFileObject(object): + if not self._complete: self._do_grab() + return self.fo.readline() + +- i = string.find(self._rbuf, '\n') ++ i = self._rbuf.find('\n') + while i < 0 and not (0 < limit <= len(self._rbuf)): + L = len(self._rbuf) + self._fill_buffer(L + self._rbufsize) + if not len(self._rbuf) > L: break +- i = string.find(self._rbuf, '\n', L) ++ i = self._rbuf.find('\n', L) + + if i < 0: i = len(self._rbuf) + else: i = i+1 +@@ -1968,9 +1986,9 @@ def _dumps(v): + if v is None: return 'None' + if v is True: return 'True' + if v is False: return 'False' +- if type(v) in (int, long, float): ++ if type(v) in six.integer_types + (float,): + return str(v) +- if type(v) == unicode: ++ if not six.PY3 and type(v) == unicode: + v = v.encode('UTF8') + if type(v) == str: + def quoter(c): return _quoter_map.get(c, c) +@@ -1979,17 +1997,21 @@ def _dumps(v): + return "(%s)" % ','.join(map(_dumps, v)) + if type(v) == list: + return "[%s]" % ','.join(map(_dumps, v)) +- raise TypeError, 'Can\'t serialize %s' % v ++ raise TypeError('Can\'t serialize %s' % v) + + def _loads(s): + def decode(v): + if v == 'None': return None + if v == 'True': return True + if v == 'False': return False +- try: return int(v) +- except ValueError: pass +- try: return float(v) +- except ValueError: pass ++ try: ++ return int(v) ++ except ValueError: ++ pass ++ try: ++ return float(v) ++ except ValueError: ++ pass + if len(v) >= 2 and v[0] == v[-1] == "'": + ret = []; i = 1 + while True: +@@ -2033,9 +2055,11 @@ def _readlines(fd): + buf = os.read(fd, 4096) + if not buf: return None + # whole lines only, no buffering +- while buf[-1] != '\n': ++ buf_compat = buf if not six.PY3 else buf.decode('utf-8') ++ while buf_compat[-1] != '\n': + buf += os.read(fd, 4096) +- return buf[:-1].split('\n') ++ buf_compat = buf if not six.PY3 else buf.decode('utf-8') ++ return buf_compat[:-1].split('\n') + + import subprocess + +@@ -2071,7 +2095,8 @@ class _ExternalDownloader: + arg = [] + for k in self._options: + v = getattr(opts, k) +- if v is None: continue ++ if v is None: ++ continue + arg.append('%s=%s' % (k, _dumps(v))) + if opts.progress_obj and opts.multi_progress_obj: + arg.append('progress_obj=True') +@@ -2080,7 +2105,8 @@ class _ExternalDownloader: + + self.cnt += 1 + self.running[self.cnt] = opts +- os.write(self.stdin, arg +'\n') ++ result = arg +'\n' ++ os.write(self.stdin, result if not six.PY3 else result.encode('utf-8')) + + def perform(self): + ret = [] +@@ -2091,7 +2117,7 @@ class _ExternalDownloader: + for line in lines: + # parse downloader output + line = line.split(' ', 6) +- _id, size = map(int, line[:2]) ++ _id, size = list(map(int, line[:2])) + if len(line) == 2: + self.running[_id]._progress.update(size) + continue +@@ -2121,7 +2147,7 @@ class _ExternalDownloaderPool: + self.cache = {} + + def start(self, opts): +- host = urlparse.urlsplit(opts.url).netloc ++ host = urllib.parse.urlsplit(opts.url).netloc + dl = self.cache.pop(host, None) + if not dl: + dl = _ExternalDownloader() +@@ -2144,8 +2170,9 @@ class _ExternalDownloaderPool: + ret.extend(done) + + # dl finished, move it to the cache +- host = urlparse.urlsplit(done[0][0].url).netloc +- if host in self.cache: self.cache[host].abort() ++ host = urllib.parse.urlsplit(done[0][0].url).netloc ++ if host in self.cache: ++ self.cache[host].abort() + self.epoll.unregister(fd) + self.cache[host] = self.running.pop(fd) + return ret +@@ -2189,7 +2216,7 @@ def parallel_wait(meter=None): + opts.tries = tries + try: + dl.start(opts) +- except OSError, e: ++ except OSError as e: + # can't spawn downloader, give up immediately + opts.exception = URLGrabError(5, exception2msg(e)) + _run_callback(opts.failfunc, opts) +@@ -2212,7 +2239,8 @@ def parallel_wait(meter=None): + if ug_err is None: + if opts.checkfunc: + try: _run_callback(opts.checkfunc, opts) +- except URLGrabError, ug_err: pass ++ except URLGrabError: ++ pass + + if opts.progress_obj: + if opts.multi_progress_obj: +@@ -2242,8 +2270,9 @@ def parallel_wait(meter=None): + retry = opts.retry or 0 + if opts.failure_callback: + opts.exception = ug_err +- try: _run_callback(opts.failure_callback, opts) +- except URLGrabError, ug_err: ++ try: ++ _run_callback(opts.failure_callback, opts) ++ except URLGrabError: + retry = 0 # no retries + if opts.tries < retry and ug_err.errno in opts.retrycodes: + start(opts, opts.tries + 1) # simple retry +@@ -2293,8 +2322,7 @@ def parallel_wait(meter=None): + # check global limit + while len(dl.running) >= default_grabber.opts.max_connections: + perform() +- if DEBUG: +- DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections) ++ if DEBUG: DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections) + + if opts.mirror_group: + mg, errors, failed, removed = opts.mirror_group +@@ -2345,12 +2373,12 @@ def parallel_wait(meter=None): + limit = 1 + while host_con.get(key, 0) >= (limit or 2): + perform() +- if DEBUG: +- DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit) ++ if DEBUG: DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit) + + start(opts, 1) +- except IOError, e: +- if e.errno != 4: raise ++ except IOError as e: ++ if e.errno != 4: ++ raise + raise KeyboardInterrupt + + finally: +@@ -2399,7 +2427,7 @@ class _TH: + def update(url, dl_size, dl_time, ug_err, baseurl=None): + # Use hostname from URL. If it's a file:// URL, use baseurl. + # If no baseurl, do not update timedhosts. +- host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl ++ host = urllib.parse.urlsplit(url).netloc.split('@')[-1] or baseurl + if not host: return + + _TH.load() +@@ -2431,7 +2459,7 @@ class _TH: + _TH.load() + + # Use just the hostname, unless it's a file:// baseurl. +- host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl ++ host = urllib.parse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl + + default_speed = default_grabber.opts.default_speed + try: speed, fail, ts = _TH.hosts[host] +@@ -2447,68 +2475,67 @@ class _TH: + def _main_test(): + try: url, filename = sys.argv[1:3] + except ValueError: +- print 'usage:', sys.argv[0], \ +- ' [copy_local=0|1] [close_connection=0|1]' ++ print('usage:', sys.argv[0], \ ++ ' [copy_local=0|1] [close_connection=0|1]') + sys.exit() + + kwargs = {} + for a in sys.argv[3:]: +- k, v = string.split(a, '=', 1) ++ k, v = a.split('=', 1) + kwargs[k] = int(v) + + set_throttle(1.0) + set_bandwidth(32 * 1024) +- print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, +- default_grabber.bandwidth) ++ print("throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, ++ default_grabber.bandwidth)) + +- try: from progress import text_progress_meter +- except ImportError, e: pass ++ try: from .progress import text_progress_meter ++ except ImportError: pass + else: kwargs['progress_obj'] = text_progress_meter() + +- try: name = apply(urlgrab, (url, filename), kwargs) +- except URLGrabError, e: print e +- else: print 'LOCAL FILE:', name ++ try: name = urlgrab(*(url, filename), **kwargs) ++ except URLGrabError as e: print(e) ++ else: print('LOCAL FILE:', name) + + + def _retry_test(): + try: url, filename = sys.argv[1:3] + except ValueError: +- print 'usage:', sys.argv[0], \ +- ' [copy_local=0|1] [close_connection=0|1]' ++ print('usage:', sys.argv[0], \ ++ ' [copy_local=0|1] [close_connection=0|1]') + sys.exit() + + kwargs = {} + for a in sys.argv[3:]: +- k, v = string.split(a, '=', 1) ++ k, v = a.split('=', 1) + kwargs[k] = int(v) + +- try: from progress import text_progress_meter +- except ImportError, e: pass ++ try: from .progress import text_progress_meter ++ except ImportError: pass + else: kwargs['progress_obj'] = text_progress_meter() + + def cfunc(filename, hello, there='foo'): +- print hello, there ++ print(hello, there) + import random + rnum = random.random() + if rnum < .5: +- print 'forcing retry' ++ print('forcing retry') + raise URLGrabError(-1, 'forcing retry') + if rnum < .75: +- print 'forcing failure' ++ print('forcing failure') + raise URLGrabError(-2, 'forcing immediate failure') +- print 'success' ++ print('success') + return + + kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) +- try: name = apply(retrygrab, (url, filename), kwargs) +- except URLGrabError, e: print e +- else: print 'LOCAL FILE:', name ++ try: name = retrygrab(*(url, filename), **kwargs) ++ except URLGrabError as e: print(e) ++ else: print('LOCAL FILE:', name) + + def _file_object_test(filename=None): +- import cStringIO + if filename is None: + filename = __file__ +- print 'using file "%s" for comparisons' % filename ++ print('using file "%s" for comparisons' % filename) + fo = open(filename) + s_input = fo.read() + fo.close() +@@ -2517,14 +2544,13 @@ def _file_object_test(filename=None): + _test_file_object_readall, + _test_file_object_readline, + _test_file_object_readlines]: +- fo_input = cStringIO.StringIO(s_input) +- fo_output = cStringIO.StringIO() ++ fo_input = StringIO(s_input) ++ fo_output = StringIO() + wrapper = PyCurlFileObject(fo_input, None, 0) +- print 'testing %-30s ' % testfunc.__name__, +- testfunc(wrapper, fo_output) ++ print('testing %-30s ' % testfunc.__name__, testfunc(wrapper, fo_output)) + s_output = fo_output.getvalue() +- if s_output == s_input: print 'passed' +- else: print 'FAILED' ++ if s_output == s_input: print('passed') ++ else: print('FAILED') + + def _test_file_object_smallread(wrapper, fo_output): + while 1: +@@ -2544,7 +2570,7 @@ def _test_file_object_readline(wrapper, fo_output): + + def _test_file_object_readlines(wrapper, fo_output): + li = wrapper.readlines() +- fo_output.write(string.join(li, '')) ++ fo_output.write(''.join(li)) + + if __name__ == '__main__': + _main_test() +diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py +index 988a309..f3c2664 100644 +--- a/urlgrabber/mirror.py ++++ b/urlgrabber/mirror.py +@@ -92,13 +92,14 @@ CUSTOMIZATION + + + import sys ++import six + import random +-import thread # needed for locking to make this threadsafe ++from six.moves import _thread as thread # needed for locking to make this threadsafe + +-from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 +-from grabber import _run_callback, _do_raise +-from grabber import exception2msg +-from grabber import _TH ++from urlgrabber.grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 ++from urlgrabber.grabber import _run_callback, _do_raise ++from urlgrabber.grabber import exception2msg ++from urlgrabber.grabber import _TH + + def _(st): + return st +@@ -286,7 +287,7 @@ class MirrorGroup: + def _parse_mirrors(self, mirrors): + parsed_mirrors = [] + for m in mirrors: +- if isinstance(m, basestring): ++ if isinstance(m, six.string_types): + m = {'mirror': _to_utf8(m)} + parsed_mirrors.append(m) + return parsed_mirrors +@@ -423,7 +424,7 @@ class MirrorGroup: + if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) + try: + return func_ref( *(fullurl,), opts=opts, **kw ) +- except URLGrabError, e: ++ except URLGrabError as e: + if DEBUG: DEBUG.info('MIRROR: failed') + gr.errors.append((fullurl, exception2msg(e))) + obj = CallbackObject() +@@ -446,7 +447,7 @@ class MirrorGroup: + func = 'urlgrab' + try: + return self._mirror_try(func, url, kw) +- except URLGrabError, e: ++ except URLGrabError as e: + obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs) + return _run_callback(kwargs.get('failfunc', _do_raise), obj) + +diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py +index 9b77c54..2235397 100644 +--- a/urlgrabber/progress.py ++++ b/urlgrabber/progress.py +@@ -19,9 +19,10 @@ + + + import sys ++import six + import time + import math +-import thread ++from six.moves import _thread as thread + import fcntl + import struct + import termios +@@ -606,7 +607,7 @@ class TextMultiFileMeter(MultiFileMeter): + try: + format = "%-30.30s %6.6s %s" + fn = meter.text or meter.basename +- if type(message) in (type(''), type(u'')): ++ if type(message) in (type(''), type('')): + message = message.splitlines() + if not message: message = [''] + out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) +@@ -778,7 +779,7 @@ def format_number(number, SI=0, space=' '): + depth = depth + 1 + number = number / step + +- if type(number) == type(1) or type(number) == type(1L): ++ if type(number) in six.integer_types: + # it's an int or a long, which means it didn't get divided, + # which means it's already short enough + format = '%i%s%s' +@@ -806,7 +807,7 @@ def _tst(fn, cur, tot, beg, size, *args): + tm.end(size) + + def _mtst(datas, *args): +- print '-' * 79 ++ print('-' * 79) + tm = TextMultiFileMeter(threaded=False) + + dl_sizes = {} diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 42a39b6..c810c38 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,18 +3,21 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.10.1 -Release: 2%{?dist} +Release: 3%{?dist} Source0: http://urlgrabber.baseurl.org/download/urlgrabber-%{version}.tar.gz Patch1: urlgrabber-HEAD.patch +Patch2: BZ-1051554-speed-on-404-mirror.patch +Patch3: port-to-python3.patch +Patch4: port-tests-to-python3.patch License: LGPLv2+ Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root BuildArch: noarch -BuildRequires: python-devel, python-pycurl +BuildRequires: python-devel, python-pycurl, python-six Url: http://urlgrabber.baseurl.org/ Provides: urlgrabber = %{version}-%{release} -Requires: python-pycurl +Requires: python-pycurl, python-six %description A high-level cross-protocol url-grabber for python supporting HTTP, FTP @@ -24,6 +27,9 @@ authentication, proxies and more. %prep %setup -q -n urlgrabber-%{version} %patch1 -p1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 %build python setup.py build @@ -46,6 +52,10 @@ rm -rf $RPM_BUILD_ROOT %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down %changelog +* Fri Aug 29 2014 Valentina Mukhamedzhanova - 3.10.1-3 +- Don't set speed=0 on a new mirror that 404'd. BZ 1051554 +- Support both Python 2 and 3. BZ 985288 + * Sun Aug 3 2014 Tom Callaway - 3.10.1-2 - fix license handling