parent
9a2719055a
commit
b791d6f370
@ -1,31 +0,0 @@
|
||||
From 62f94b534289b7ab0bda879962bf6efb124a9930 Mon Sep 17 00:00:00 2001
|
||||
From: Tomas Radej <tradej@redhat.com>
|
||||
Date: Tue, 2 Sep 2014 12:52:50 +0200
|
||||
Subject: [PATCH] StringIO patch
|
||||
|
||||
---
|
||||
urlgrabber/grabber.py | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
||||
index 35c091e..26335d1 100644
|
||||
--- a/urlgrabber/grabber.py
|
||||
+++ b/urlgrabber/grabber.py
|
||||
@@ -1298,12 +1298,12 @@ class PyCurlFileObject(object):
|
||||
start = self._range[0] - pos
|
||||
stop = self._range[1] - pos
|
||||
if start < len(buf) and stop > 0:
|
||||
- if not six.PY3 or isinstance(self.fo, StringIO):
|
||||
+ if not six.PY3 and isinstance(self.fo, StringIO):
|
||||
self.fo.write(buf[max(start, 0):stop].decode('utf-8'))
|
||||
else:
|
||||
self.fo.write(buf[max(start, 0):stop])
|
||||
else:
|
||||
- if not six.PY3 or isinstance(self.fo, StringIO):
|
||||
+ if not six.PY3 and isinstance(self.fo, StringIO):
|
||||
self.fo.write(buf.decode('utf-8'))
|
||||
else:
|
||||
self.fo.write(buf)
|
||||
--
|
||||
1.9.3
|
||||
|
@ -0,0 +1,433 @@
|
||||
diff --git a/test/test_grabber.py b/test/test_grabber.py
|
||||
index bd36d66..bd54329 100644
|
||||
--- a/test/test_grabber.py
|
||||
+++ b/test/test_grabber.py
|
||||
@@ -42,7 +42,7 @@ from urlgrabber.progress import text_progress_meter
|
||||
class FileObjectTests(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
- self.filename = tempfile.mktemp()
|
||||
+ _, self.filename = tempfile.mkstemp()
|
||||
fo = open(self.filename, 'wb')
|
||||
fo.write(reference_data.encode('utf-8'))
|
||||
fo.close()
|
||||
@@ -61,35 +61,36 @@ class FileObjectTests(TestCase):
|
||||
def test_readall(self):
|
||||
"PYCurlFileObject .read() method"
|
||||
s = self.wrapper.read()
|
||||
- self.fo_output.write(s)
|
||||
+ self.fo_output.write(unicode(s) if not six.PY3 else s)
|
||||
self.assert_(reference_data == self.fo_output.getvalue())
|
||||
|
||||
def test_readline(self):
|
||||
"PyCurlFileObject .readline() method"
|
||||
while 1:
|
||||
s = self.wrapper.readline()
|
||||
- self.fo_output.write(s)
|
||||
+ self.fo_output.write(unicode(s) if not six.PY3 else s)
|
||||
if not s: break
|
||||
self.assert_(reference_data == self.fo_output.getvalue())
|
||||
|
||||
def test_readlines(self):
|
||||
"PyCurlFileObject .readlines() method"
|
||||
li = self.wrapper.readlines()
|
||||
- self.fo_output.write(''.join(li))
|
||||
+ out = ''.join(li)
|
||||
+ self.fo_output.write(unicode(out) if not six.PY3 else out)
|
||||
self.assert_(reference_data == self.fo_output.getvalue())
|
||||
|
||||
def test_smallread(self):
|
||||
"PyCurlFileObject .read(N) with small N"
|
||||
while 1:
|
||||
s = self.wrapper.read(23)
|
||||
- self.fo_output.write(s)
|
||||
+ self.fo_output.write(unicode(s) if not six.PY3 else s)
|
||||
if not s: break
|
||||
self.assert_(reference_data == self.fo_output.getvalue())
|
||||
|
||||
class HTTPTests(TestCase):
|
||||
def test_reference_file(self):
|
||||
"download reference file via HTTP"
|
||||
- filename = tempfile.mktemp()
|
||||
+ _, filename = tempfile.mkstemp()
|
||||
grabber.urlgrab(ref_http, filename)
|
||||
|
||||
fo = open(filename, 'rb' if not six.PY3 else 'r')
|
||||
@@ -123,7 +124,7 @@ class URLGrabberModuleTestCase(TestCase):
|
||||
|
||||
def test_urlgrab(self):
|
||||
"module-level urlgrab() function"
|
||||
- outfile = tempfile.mktemp()
|
||||
+ _, outfile = tempfile.mkstemp()
|
||||
filename = urlgrabber.urlgrab('http://www.python.org',
|
||||
filename=outfile)
|
||||
os.unlink(outfile)
|
||||
@@ -367,7 +368,7 @@ class CheckfuncTestCase(TestCase):
|
||||
def setUp(self):
|
||||
cf = (self._checkfunc, ('foo',), {'bar': 'baz'})
|
||||
self.g = grabber.URLGrabber(checkfunc=cf)
|
||||
- self.filename = tempfile.mktemp()
|
||||
+ _, self.filename = tempfile.mkstemp()
|
||||
self.data = short_reference_data
|
||||
|
||||
def tearDown(self):
|
||||
@@ -440,7 +441,7 @@ class RegetTestBase:
|
||||
def setUp(self):
|
||||
self.ref = short_reference_data
|
||||
self.grabber = grabber.URLGrabber(reget='check_timestamp')
|
||||
- self.filename = tempfile.mktemp()
|
||||
+ _, self.filename = tempfile.mkstemp()
|
||||
self.hl = len(self.ref) / 2
|
||||
self.url = 'OVERRIDE THIS'
|
||||
|
||||
@@ -522,7 +523,7 @@ class HTTPRegetTests(FTPRegetTests):
|
||||
class FileRegetTests(HTTPRegetTests):
|
||||
def setUp(self):
|
||||
self.ref = short_reference_data
|
||||
- tmp = tempfile.mktemp()
|
||||
+ _, tmp = tempfile.mkstemp()
|
||||
tmpfo = open(tmp, 'wb' if not six.PY3 else 'w')
|
||||
tmpfo.write(self.ref)
|
||||
tmpfo.close()
|
||||
@@ -534,7 +535,7 @@ class FileRegetTests(HTTPRegetTests):
|
||||
|
||||
self.grabber = grabber.URLGrabber(reget='check_timestamp',
|
||||
copy_local=1)
|
||||
- self.filename = tempfile.mktemp()
|
||||
+ _, self.filename = tempfile.mkstemp()
|
||||
self.hl = len(self.ref) / 2
|
||||
|
||||
def tearDown(self):
|
||||
diff --git a/test/test_mirror.py b/test/test_mirror.py
|
||||
index c46cd33..b923dd1 100644
|
||||
--- a/test/test_mirror.py
|
||||
+++ b/test/test_mirror.py
|
||||
@@ -50,7 +50,7 @@ class BasicTests(TestCase):
|
||||
|
||||
def test_urlgrab(self):
|
||||
"""MirrorGroup.urlgrab"""
|
||||
- filename = tempfile.mktemp()
|
||||
+ _, filename = tempfile.mkstemp()
|
||||
url = 'short_reference'
|
||||
self.mg.urlgrab(url, filename)
|
||||
|
||||
@@ -84,7 +84,7 @@ class SubclassTests(TestCase):
|
||||
def fetchwith(self, mgclass):
|
||||
self.mg = mgclass(self.g, self.fullmirrors)
|
||||
|
||||
- filename = tempfile.mktemp()
|
||||
+ _, filename = tempfile.mkstemp()
|
||||
url = 'short_reference'
|
||||
self.mg.urlgrab(url, filename)
|
||||
|
||||
@@ -137,7 +137,7 @@ class BadMirrorTests(TestCase):
|
||||
|
||||
def test_simple_grab(self):
|
||||
"""test that a bad mirror raises URLGrabError"""
|
||||
- filename = tempfile.mktemp()
|
||||
+ _, filename = tempfile.mkstemp()
|
||||
url = 'reference'
|
||||
self.assertRaises(URLGrabError, self.mg.urlgrab, url, filename)
|
||||
|
||||
@@ -150,7 +150,7 @@ class FailoverTests(TestCase):
|
||||
|
||||
def test_simple_grab(self):
|
||||
"""test that a the MG fails over past a bad mirror"""
|
||||
- filename = tempfile.mktemp()
|
||||
+ _, filename = tempfile.mkstemp()
|
||||
url = 'reference'
|
||||
elist = []
|
||||
def cb(e, elist=elist): elist.append(e)
|
||||
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
|
||||
index ffaed8e..95287fc 100644
|
||||
--- a/urlgrabber/byterange.py
|
||||
+++ b/urlgrabber/byterange.py
|
||||
@@ -27,7 +27,7 @@ from six.moves import urllib
|
||||
|
||||
DEBUG = None
|
||||
|
||||
-from io import StringIO
|
||||
+from io import BytesIO
|
||||
|
||||
class RangeError(IOError):
|
||||
"""Error raised when an unsatisfiable range is requested."""
|
||||
@@ -238,8 +238,8 @@ class FileRangeHandler(urllib.request.FileHandler):
|
||||
raise RangeError(9, 'Requested Range Not Satisfiable')
|
||||
size = (lb - fb)
|
||||
fo = RangeableFileObject(fo, (fb,lb))
|
||||
- headers = email.message.Message(StringIO(
|
||||
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
||||
+ headers = email.message.Message(BytesIO(
|
||||
+ b'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
||||
(mtype or 'text/plain', size, modified)))
|
||||
return urllib.addinfourl(fo, headers, 'file:'+file)
|
||||
|
||||
@@ -323,13 +323,13 @@ class FTPRangeHandler(urllib.request.FTPHandler):
|
||||
fp = RangeableFileObject(fp, (0,retrlen))
|
||||
# -- range support modifications end here
|
||||
|
||||
- headers = ""
|
||||
+ headers = b""
|
||||
mtype = mimetypes.guess_type(req.get_full_url())[0]
|
||||
if mtype:
|
||||
- headers += "Content-Type: %s\n" % mtype
|
||||
+ headers += b"Content-Type: %s\n" % mtype
|
||||
if retrlen is not None and retrlen >= 0:
|
||||
- headers += "Content-Length: %d\n" % retrlen
|
||||
- sf = StringIO(headers)
|
||||
+ headers += b"Content-Length: %d\n" % retrlen
|
||||
+ sf = BytesIO(headers)
|
||||
headers = email.message.Message(sf)
|
||||
return addinfourl(fp, headers, req.get_full_url())
|
||||
except ftplib.all_errors as msg:
|
||||
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
||||
index 35c091e..69cd113 100644
|
||||
--- a/urlgrabber/grabber.py
|
||||
+++ b/urlgrabber/grabber.py
|
||||
@@ -516,7 +516,7 @@ from six.moves import urllib
|
||||
from six.moves.http_client import responses, HTTPException
|
||||
from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError
|
||||
|
||||
-from io import StringIO
|
||||
+from io import BytesIO
|
||||
|
||||
try:
|
||||
import xattr
|
||||
@@ -1235,7 +1235,7 @@ default_grabber = URLGrabber()
|
||||
class PyCurlFileObject(object):
|
||||
def __init__(self, url, filename, opts):
|
||||
self.fo = None
|
||||
- self._hdr_dump = ''
|
||||
+ self._hdr_dump = b''
|
||||
self._parsed_hdr = None
|
||||
self.url = url
|
||||
self.scheme = urllib.parse.urlsplit(self.url)[0]
|
||||
@@ -1246,7 +1246,7 @@ class PyCurlFileObject(object):
|
||||
if self.opts.reget == 'check_timestamp':
|
||||
raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.")
|
||||
self._complete = False
|
||||
- self._rbuf = ''
|
||||
+ self._rbuf = b''
|
||||
self._rbufsize = 1024*8
|
||||
self._ttime = time.time()
|
||||
self._tsize = 0
|
||||
@@ -1298,15 +1298,9 @@ class PyCurlFileObject(object):
|
||||
start = self._range[0] - pos
|
||||
stop = self._range[1] - pos
|
||||
if start < len(buf) and stop > 0:
|
||||
- if not six.PY3 or isinstance(self.fo, StringIO):
|
||||
- self.fo.write(buf[max(start, 0):stop].decode('utf-8'))
|
||||
- else:
|
||||
- self.fo.write(buf[max(start, 0):stop])
|
||||
+ self.fo.write(buf[max(start, 0):stop])
|
||||
else:
|
||||
- if not six.PY3 or isinstance(self.fo, StringIO):
|
||||
- self.fo.write(buf.decode('utf-8'))
|
||||
- else:
|
||||
- self.fo.write(buf)
|
||||
+ self.fo.write(buf)
|
||||
except IOError as e:
|
||||
self._cb_error = URLGrabError(16, exception2msg(e))
|
||||
return -1
|
||||
@@ -1316,7 +1310,7 @@ class PyCurlFileObject(object):
|
||||
|
||||
def _hdr_retrieve(self, buf):
|
||||
if self._hdr_ended:
|
||||
- self._hdr_dump = ''
|
||||
+ self._hdr_dump = b''
|
||||
self.size = 0
|
||||
self._hdr_ended = False
|
||||
|
||||
@@ -1328,12 +1322,12 @@ class PyCurlFileObject(object):
|
||||
# but we can't do that w/o making it do 2 connects, which sucks
|
||||
# so we cheat and stuff it in here in the hdr_retrieve
|
||||
if self.scheme in ['http','https']:
|
||||
- content_length_str = 'content-length:' if not six.PY3 else b'content-length:'
|
||||
+ content_length_str = b'content-length:'
|
||||
if buf.lower().find(content_length_str) != -1:
|
||||
- split_str = ':' if not six.PY3 else b':'
|
||||
+ split_str = b':'
|
||||
length = buf.split(split_str)[1]
|
||||
self.size = int(length)
|
||||
- elif (self.append or self.opts.range) and self._hdr_dump == '' and b' 200 ' in buf:
|
||||
+ elif (self.append or self.opts.range) and self._hdr_dump == b'' and b' 200 ' in buf:
|
||||
# reget was attempted but server sends it all
|
||||
# undo what we did in _build_range()
|
||||
self.append = False
|
||||
@@ -1349,20 +1343,19 @@ class PyCurlFileObject(object):
|
||||
if len(s) >= 14:
|
||||
s = None # ignore MDTM responses
|
||||
elif buf.startswith(b'150 '):
|
||||
- s = parse150(buf if not six.PY3 else buf.decode('utf-8'))
|
||||
+ s = parse150(buf.decode('utf-8')) # Necessary in Python 3, doesn't hurt in Python 2
|
||||
if s:
|
||||
self.size = int(s)
|
||||
|
||||
- location_str = 'location' if not six.PY3 else b'location'
|
||||
+ location_str = b'location'
|
||||
if buf.lower().find(location_str) != -1:
|
||||
- buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||||
- location = ':'.join(buf_compat.split(':')[1:])
|
||||
+ location = b':'.join(buf.split(b':')[1:])
|
||||
location = location.strip()
|
||||
self.scheme = urllib.parse.urlsplit(location)[0]
|
||||
self.url = location
|
||||
|
||||
- self._hdr_dump += buf if not six.PY3 else buf.decode('utf-8')
|
||||
- end_str = '\r\n' if not six.PY3 else b'\r\n'
|
||||
+ self._hdr_dump += buf
|
||||
+ end_str = b'\r\n'
|
||||
if len(self._hdr_dump) != 0 and buf == end_str:
|
||||
self._hdr_ended = True
|
||||
if DEBUG: DEBUG.debug('header ended:')
|
||||
@@ -1374,12 +1367,12 @@ class PyCurlFileObject(object):
|
||||
def _return_hdr_obj(self):
|
||||
if self._parsed_hdr:
|
||||
return self._parsed_hdr
|
||||
- statusend = self._hdr_dump.find('\n')
|
||||
+ statusend = self._hdr_dump.find(b'\n')
|
||||
statusend += 1 # ridiculous as it may seem.
|
||||
- hdrfp = StringIO()
|
||||
+ hdrfp = BytesIO()
|
||||
hdrfp.write(self._hdr_dump[statusend:])
|
||||
hdrfp.seek(0)
|
||||
- self._parsed_hdr = Message(hdrfp)
|
||||
+ self._parsed_hdr = Message(hdrfp)
|
||||
return self._parsed_hdr
|
||||
|
||||
hdr = property(_return_hdr_obj)
|
||||
@@ -1709,7 +1702,7 @@ class PyCurlFileObject(object):
|
||||
return (fo, hdr)
|
||||
|
||||
def _do_grab(self):
|
||||
- """dump the file to a filename or StringIO buffer"""
|
||||
+ """dump the file to a filename or BytesIO buffer"""
|
||||
|
||||
if self._complete:
|
||||
return
|
||||
@@ -1739,7 +1732,7 @@ class PyCurlFileObject(object):
|
||||
self._prog_basename = 'MEMORY'
|
||||
|
||||
|
||||
- self.fo = StringIO()
|
||||
+ self.fo = BytesIO()
|
||||
# if this is to be a tempfile instead....
|
||||
# it just makes crap in the tempdir
|
||||
#fh, self._temp_name = mkstemp()
|
||||
@@ -1778,7 +1771,7 @@ class PyCurlFileObject(object):
|
||||
raise err
|
||||
# re open it
|
||||
try:
|
||||
- self.fo = open(self.filename, 'r')
|
||||
+ self.fo = open(self.filename, 'rb')
|
||||
except IOError as e:
|
||||
err = URLGrabError(16, _(\
|
||||
'error opening file from %s, IOError: %s') % (self.url, e))
|
||||
@@ -1853,7 +1846,7 @@ class PyCurlFileObject(object):
|
||||
#if self.opts.progress_obj:
|
||||
# self.opts.progress_obj.update(self._amount_read)
|
||||
|
||||
- self._rbuf = ''.join(buf)
|
||||
+ self._rbuf = b''.join(buf)
|
||||
return
|
||||
|
||||
def _progress_update(self, download_total, downloaded, upload_total, uploaded):
|
||||
@@ -1888,28 +1881,40 @@ class PyCurlFileObject(object):
|
||||
def read(self, amt=None):
|
||||
self._fill_buffer(amt)
|
||||
if amt is None:
|
||||
- s, self._rbuf = self._rbuf, ''
|
||||
+ s, self._rbuf = self._rbuf, b''
|
||||
else:
|
||||
s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
|
||||
- return s
|
||||
+ return s if not six.PY3 else s.decode('utf-8')
|
||||
|
||||
def readline(self, limit=-1):
|
||||
if not self._complete: self._do_grab()
|
||||
- return self.fo.readline()
|
||||
+ return self.fo.readline() if not six.PY3 else self.fo.readline().decode('utf-8')
|
||||
|
||||
- i = self._rbuf.find('\n')
|
||||
+ i = self._rbuf.find(b'\n')
|
||||
while i < 0 and not (0 < limit <= len(self._rbuf)):
|
||||
L = len(self._rbuf)
|
||||
self._fill_buffer(L + self._rbufsize)
|
||||
if not len(self._rbuf) > L: break
|
||||
- i = self._rbuf.find('\n', L)
|
||||
+ i = self._rbuf.find(b'\n', L)
|
||||
|
||||
if i < 0: i = len(self._rbuf)
|
||||
else: i = i+1
|
||||
if 0 <= limit < len(self._rbuf): i = limit
|
||||
|
||||
s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
|
||||
- return s
|
||||
+ return s if not six.PY3 else s.decode('utf-8')
|
||||
+
|
||||
+ # This was added here because we need to wrap self.fo readlines (which will
|
||||
+ # always return bytes) in correct decoding
|
||||
+ def readlines(self, *args, **kwargs):
|
||||
+ if not six.PY3:
|
||||
+ return [line for line in self.fo.readlines(*args, **kwargs)]
|
||||
+ else:
|
||||
+ return self._py3readlines(*args, **kwargs)
|
||||
+
|
||||
+ def _py3readlines(self, *args, **kwargs):
|
||||
+ for line in self.fo.readlines(*args, **kwargs):
|
||||
+ yield line.decode('utf-8')
|
||||
|
||||
def close(self):
|
||||
if self._prog_running:
|
||||
@@ -2055,11 +2060,9 @@ def _readlines(fd):
|
||||
buf = os.read(fd, 4096)
|
||||
if not buf: return None
|
||||
# whole lines only, no buffering
|
||||
- buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||||
- while buf_compat[-1] != '\n':
|
||||
+ while buf.decode('utf-8')[-1] != '\n':
|
||||
buf += os.read(fd, 4096)
|
||||
- buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||||
- return buf_compat[:-1].split('\n')
|
||||
+ return buf.decode('utf-8')[:-1].split('\n')
|
||||
|
||||
import subprocess
|
||||
|
||||
@@ -2403,7 +2406,7 @@ class _TH:
|
||||
if filename and _TH.dirty is None:
|
||||
try:
|
||||
now = int(time.time())
|
||||
- for line in open(filename):
|
||||
+ for line in open(filename, 'rb'):
|
||||
host, speed, fail, ts = line.rsplit(' ', 3)
|
||||
_TH.hosts[host] = int(speed), int(fail), min(int(ts), now)
|
||||
except IOError: pass
|
||||
@@ -2415,7 +2418,7 @@ class _TH:
|
||||
if filename and _TH.dirty is True:
|
||||
tmp = '%s.%d' % (filename, os.getpid())
|
||||
try:
|
||||
- f = open(tmp, 'w')
|
||||
+ f = open(tmp, 'wb')
|
||||
for host in _TH.hosts:
|
||||
f.write(host + ' %d %d %d\n' % _TH.hosts[host])
|
||||
f.close()
|
||||
@@ -2536,7 +2539,7 @@ def _file_object_test(filename=None):
|
||||
if filename is None:
|
||||
filename = __file__
|
||||
print('using file "%s" for comparisons' % filename)
|
||||
- fo = open(filename)
|
||||
+ fo = open(filename, 'rb')
|
||||
s_input = fo.read()
|
||||
fo.close()
|
||||
|
||||
@@ -2544,8 +2547,8 @@ def _file_object_test(filename=None):
|
||||
_test_file_object_readall,
|
||||
_test_file_object_readline,
|
||||
_test_file_object_readlines]:
|
||||
- fo_input = StringIO(s_input)
|
||||
- fo_output = StringIO()
|
||||
+ fo_input = BytesIO(s_input)
|
||||
+ fo_output = BytesIO()
|
||||
wrapper = PyCurlFileObject(fo_input, None, 0)
|
||||
print('testing %-30s ' % testfunc.__name__, testfunc(wrapper, fo_output))
|
||||
s_output = fo_output.getvalue()
|
Loading…
Reference in new issue