You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1176 lines
44 KiB
1176 lines
44 KiB
11 years ago
|
commit 7d6b90e17d333535549e2d3ec1cf41845a9b876f
|
||
|
Author: Tomas Radej <tradej@redhat.com>
|
||
|
Date: Wed Aug 20 13:32:18 2014 +0200
|
||
|
|
||
|
Ported main code
|
||
|
|
||
|
diff --git a/urlgrabber/__init__.py b/urlgrabber/__init__.py
|
||
|
index b3047b0..636849c 100644
|
||
|
--- a/urlgrabber/__init__.py
|
||
|
+++ b/urlgrabber/__init__.py
|
||
|
@@ -52,4 +52,4 @@ __author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
|
||
|
'Zdenek Pavlas <zpavlas@redhat.com>'
|
||
|
__url__ = 'http://urlgrabber.baseurl.org/'
|
||
|
|
||
|
-from grabber import urlgrab, urlopen, urlread
|
||
|
+from urlgrabber.grabber import urlgrab, urlopen, urlread
|
||
|
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
|
||
|
index 5efa160..ffaed8e 100644
|
||
|
--- a/urlgrabber/byterange.py
|
||
|
+++ b/urlgrabber/byterange.py
|
||
|
@@ -18,24 +18,22 @@
|
||
|
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
|
||
|
|
||
|
|
||
|
+import email
|
||
|
+import mimetypes
|
||
|
import os
|
||
|
+import six
|
||
|
import stat
|
||
|
-import urllib
|
||
|
-import urllib2
|
||
|
-import rfc822
|
||
|
+from six.moves import urllib
|
||
|
|
||
|
DEBUG = None
|
||
|
|
||
|
-try:
|
||
|
- from cStringIO import StringIO
|
||
|
-except ImportError, msg:
|
||
|
- from StringIO import StringIO
|
||
|
+from io import StringIO
|
||
|
|
||
|
class RangeError(IOError):
|
||
|
"""Error raised when an unsatisfiable range is requested."""
|
||
|
pass
|
||
|
|
||
|
-class HTTPRangeHandler(urllib2.BaseHandler):
|
||
|
+class HTTPRangeHandler(urllib.request.BaseHandler):
|
||
|
"""Handler that enables HTTP Range headers.
|
||
|
|
||
|
This was extremely simple. The Range header is a HTTP feature to
|
||
|
@@ -120,7 +118,7 @@ class RangeableFileObject:
|
||
|
in self.fo. This includes methods."""
|
||
|
if hasattr(self.fo, name):
|
||
|
return getattr(self.fo, name)
|
||
|
- raise AttributeError, name
|
||
|
+ raise AttributeError(name)
|
||
|
|
||
|
def tell(self):
|
||
|
"""Return the position within the range.
|
||
|
@@ -211,37 +209,36 @@ class RangeableFileObject:
|
||
|
raise RangeError(9, 'Requested Range Not Satisfiable')
|
||
|
pos+= bufsize
|
||
|
|
||
|
-class FileRangeHandler(urllib2.FileHandler):
|
||
|
+class FileRangeHandler(urllib.request.FileHandler):
|
||
|
"""FileHandler subclass that adds Range support.
|
||
|
This class handles Range headers exactly like an HTTP
|
||
|
server would.
|
||
|
"""
|
||
|
def open_local_file(self, req):
|
||
|
- import mimetypes
|
||
|
- import mimetools
|
||
|
host = req.get_host()
|
||
|
file = req.get_selector()
|
||
|
- localfile = urllib.url2pathname(file)
|
||
|
+ localfile = urllib.request.url2pathname(file)
|
||
|
stats = os.stat(localfile)
|
||
|
size = stats[stat.ST_SIZE]
|
||
|
- modified = rfc822.formatdate(stats[stat.ST_MTIME])
|
||
|
+ modified = email.utils.formatdate(stats[stat.ST_MTIME])
|
||
|
mtype = mimetypes.guess_type(file)[0]
|
||
|
if host:
|
||
|
- host, port = urllib.splitport(host)
|
||
|
+ host, port = urllib.parse.splitport(host)
|
||
|
if port or socket.gethostbyname(host) not in self.get_names():
|
||
|
- raise urllib2.URLError('file not on local host')
|
||
|
+ raise urllib.error.URLError('file not on local host')
|
||
|
fo = open(localfile,'rb')
|
||
|
brange = req.headers.get('Range',None)
|
||
|
brange = range_header_to_tuple(brange)
|
||
|
assert brange != ()
|
||
|
if brange:
|
||
|
(fb,lb) = brange
|
||
|
- if lb == '': lb = size
|
||
|
+ if lb == '':
|
||
|
+ lb = size
|
||
|
if fb < 0 or fb > size or lb > size:
|
||
|
raise RangeError(9, 'Requested Range Not Satisfiable')
|
||
|
size = (lb - fb)
|
||
|
fo = RangeableFileObject(fo, (fb,lb))
|
||
|
- headers = mimetools.Message(StringIO(
|
||
|
+ headers = email.message.Message(StringIO(
|
||
|
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
|
||
|
(mtype or 'text/plain', size, modified)))
|
||
|
return urllib.addinfourl(fo, headers, 'file:'+file)
|
||
|
@@ -254,42 +251,39 @@ class FileRangeHandler(urllib2.FileHandler):
|
||
|
# follows:
|
||
|
# -- range support modifications start/end here
|
||
|
|
||
|
-from urllib import splitport, splituser, splitpasswd, splitattr, \
|
||
|
- unquote, addclosehook, addinfourl
|
||
|
import ftplib
|
||
|
import socket
|
||
|
import sys
|
||
|
-import mimetypes
|
||
|
-import mimetools
|
||
|
+from six.moves.urllib.parse import urlparse, unquote
|
||
|
+
|
||
|
+# Very old functions and classes, undocumented in current Python releases
|
||
|
+if six.PY3:
|
||
|
+ from urllib.request import splitattr
|
||
|
+ from urllib.response import addinfourl
|
||
|
+else:
|
||
|
+ from urllib import splitattr
|
||
|
+ from urllib import addinfourl
|
||
|
|
||
|
-class FTPRangeHandler(urllib2.FTPHandler):
|
||
|
+
|
||
|
+class FTPRangeHandler(urllib.request.FTPHandler):
|
||
|
def ftp_open(self, req):
|
||
|
host = req.get_host()
|
||
|
if not host:
|
||
|
- raise IOError, ('ftp error', 'no host given')
|
||
|
- host, port = splitport(host)
|
||
|
- if port is None:
|
||
|
- port = ftplib.FTP_PORT
|
||
|
- else:
|
||
|
- port = int(port)
|
||
|
+ raise IOError('ftp error', 'no host given')
|
||
|
|
||
|
- # username/password handling
|
||
|
- user, host = splituser(host)
|
||
|
- if user:
|
||
|
- user, passwd = splitpasswd(user)
|
||
|
- else:
|
||
|
- passwd = None
|
||
|
+ parsed = urlparse(host)
|
||
|
+ port = parsed.port or ftplib.FTP_PORT
|
||
|
+ user = unquote(parsed.username or '')
|
||
|
+ passwd = unquote(parsed.passwd or '')
|
||
|
host = unquote(host)
|
||
|
- user = unquote(user or '')
|
||
|
- passwd = unquote(passwd or '')
|
||
|
|
||
|
try:
|
||
|
host = socket.gethostbyname(host)
|
||
|
- except socket.error, msg:
|
||
|
- raise urllib2.URLError(msg)
|
||
|
+ except socket.error as msg:
|
||
|
+ raise urllib.error.URLError(msg)
|
||
|
path, attrs = splitattr(req.get_selector())
|
||
|
dirs = path.split('/')
|
||
|
- dirs = map(unquote, dirs)
|
||
|
+ dirs = list(map(unquote, dirs))
|
||
|
dirs, file = dirs[:-1], dirs[-1]
|
||
|
if dirs and not dirs[0]:
|
||
|
dirs = dirs[1:]
|
||
|
@@ -336,24 +330,36 @@ class FTPRangeHandler(urllib2.FTPHandler):
|
||
|
if retrlen is not None and retrlen >= 0:
|
||
|
headers += "Content-Length: %d\n" % retrlen
|
||
|
sf = StringIO(headers)
|
||
|
- headers = mimetools.Message(sf)
|
||
|
+ headers = email.message.Message(sf)
|
||
|
return addinfourl(fp, headers, req.get_full_url())
|
||
|
- except ftplib.all_errors, msg:
|
||
|
- raise IOError, ('ftp error', msg), sys.exc_info()[2]
|
||
|
+ except ftplib.all_errors as msg:
|
||
|
+ error = IOError('ftp error', msg)
|
||
|
+ six.reraise(error.__class__, error, sys.exc_info()[2])
|
||
|
|
||
|
def connect_ftp(self, user, passwd, host, port, dirs):
|
||
|
fw = ftpwrapper(user, passwd, host, port, dirs)
|
||
|
return fw
|
||
|
|
||
|
-class ftpwrapper(urllib.ftpwrapper):
|
||
|
+# Very old functions and classes, undocumented in current Python releases
|
||
|
+if six.PY3:
|
||
|
+ from urllib.request import ftpwrapper, addclosehook
|
||
|
+else:
|
||
|
+ from urllib import ftpwrapper, addclosehook
|
||
|
+
|
||
|
+
|
||
|
+class ftpwrapper(ftpwrapper):
|
||
|
# range support note:
|
||
|
# this ftpwrapper code is copied directly from
|
||
|
# urllib. The only enhancement is to add the rest
|
||
|
# argument and pass it on to ftp.ntransfercmd
|
||
|
def retrfile(self, file, type, rest=None):
|
||
|
self.endtransfer()
|
||
|
- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
|
||
|
- else: cmd = 'TYPE ' + type; isdir = 0
|
||
|
+ if type in ('d', 'D'):
|
||
|
+ cmd = 'TYPE A'
|
||
|
+ isdir = 1
|
||
|
+ else:
|
||
|
+ cmd = 'TYPE ' + type
|
||
|
+ isdir = 0
|
||
|
try:
|
||
|
self.ftp.voidcmd(cmd)
|
||
|
except ftplib.all_errors:
|
||
|
@@ -364,22 +370,23 @@ class ftpwrapper(urllib.ftpwrapper):
|
||
|
# Use nlst to see if the file exists at all
|
||
|
try:
|
||
|
self.ftp.nlst(file)
|
||
|
- except ftplib.error_perm, reason:
|
||
|
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
|
||
|
+ except ftplib.error_perm as reason:
|
||
|
+ error = IOError('ftp error', reason)
|
||
|
+ six.reraise(error.__class__, error, sys.exc_info()[2])
|
||
|
# Restore the transfer mode!
|
||
|
self.ftp.voidcmd(cmd)
|
||
|
# Try to retrieve as a file
|
||
|
try:
|
||
|
cmd = 'RETR ' + file
|
||
|
conn = self.ftp.ntransfercmd(cmd, rest)
|
||
|
- except ftplib.error_perm, reason:
|
||
|
+ except ftplib.error_perm as reason:
|
||
|
if str(reason)[:3] == '501':
|
||
|
# workaround for REST not supported error
|
||
|
fp, retrlen = self.retrfile(file, type)
|
||
|
fp = RangeableFileObject(fp, (rest,''))
|
||
|
return (fp, retrlen)
|
||
|
elif str(reason)[:3] != '550':
|
||
|
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
|
||
|
+ six.reraise(IOError, ('ftp error', reason), sys.exc_info()[2])
|
||
|
if not conn:
|
||
|
# Set transfer mode to ASCII!
|
||
|
self.ftp.voidcmd('TYPE A')
|
||
|
@@ -458,6 +465,7 @@ def range_tuple_normalize(range_tup):
|
||
|
# check if range is over the entire file
|
||
|
if (fb,lb) == (0,''): return None
|
||
|
# check that the range is valid
|
||
|
- if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb))
|
||
|
+ if lb != '' and lb < fb:
|
||
|
+ raise RangeError(9, 'Invalid byte range: %s-%s' % (fb, lb))
|
||
|
return (fb,lb)
|
||
|
|
||
|
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
|
||
|
index f8deeb8..35c091e 100644
|
||
|
--- a/urlgrabber/grabber.py
|
||
|
+++ b/urlgrabber/grabber.py
|
||
|
@@ -499,22 +499,24 @@ BANDWIDTH THROTTLING
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
-import urlparse
|
||
|
import time
|
||
|
+import collections
|
||
|
+import fcntl
|
||
|
+import pycurl
|
||
|
+import select
|
||
|
+import six
|
||
|
+import socket
|
||
|
+import stat
|
||
|
import string
|
||
|
-import urllib
|
||
|
-import urllib2
|
||
|
-from httplib import responses
|
||
|
-import mimetools
|
||
|
-import thread
|
||
|
import types
|
||
|
-import stat
|
||
|
-import pycurl
|
||
|
+from email.message import Message
|
||
|
from ftplib import parse150
|
||
|
-from StringIO import StringIO
|
||
|
-from httplib import HTTPException
|
||
|
-import socket, select, fcntl
|
||
|
-from byterange import range_tuple_normalize, range_tuple_to_header, RangeError
|
||
|
+from six.moves import _thread as thread
|
||
|
+from six.moves import urllib
|
||
|
+from six.moves.http_client import responses, HTTPException
|
||
|
+from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError
|
||
|
+
|
||
|
+from io import StringIO
|
||
|
|
||
|
try:
|
||
|
import xattr
|
||
|
@@ -535,7 +537,7 @@ except:
|
||
|
try:
|
||
|
# this part isn't going to do much - need to talk to gettext
|
||
|
from i18n import _
|
||
|
-except ImportError, msg:
|
||
|
+except ImportError as msg:
|
||
|
def _(st): return st
|
||
|
|
||
|
########################################################################
|
||
|
@@ -635,6 +637,8 @@ def _(st):
|
||
|
|
||
|
def _to_utf8(obj, errors='replace'):
|
||
|
'''convert 'unicode' to an encoded utf-8 byte string '''
|
||
|
+ if six.PY3:
|
||
|
+ return obj
|
||
|
# stolen from yum.i18n
|
||
|
if isinstance(obj, unicode):
|
||
|
obj = obj.encode('utf-8', errors)
|
||
|
@@ -791,14 +795,14 @@ class URLParser:
|
||
|
if opts.prefix:
|
||
|
url = self.add_prefix(url, opts.prefix)
|
||
|
|
||
|
- parts = urlparse.urlparse(url)
|
||
|
+ parts = urllib.parse.urlparse(url)
|
||
|
(scheme, host, path, parm, query, frag) = parts
|
||
|
|
||
|
- if not scheme or (len(scheme) == 1 and scheme in string.letters):
|
||
|
+ if not scheme or (len(scheme) == 1 and scheme in string.ascii_letters):
|
||
|
# if a scheme isn't specified, we guess that it's "file:"
|
||
|
if url[0] not in '/\\': url = os.path.abspath(url)
|
||
|
- url = 'file:' + urllib.pathname2url(url)
|
||
|
- parts = urlparse.urlparse(url)
|
||
|
+ url = 'file:' + urllib.request.pathname2url(url)
|
||
|
+ parts = urllib.parse.urlparse(url)
|
||
|
quote = 0 # pathname2url quotes, so we won't do it again
|
||
|
|
||
|
if scheme in ['http', 'https']:
|
||
|
@@ -809,7 +813,7 @@ class URLParser:
|
||
|
if quote:
|
||
|
parts = self.quote(parts)
|
||
|
|
||
|
- url = urlparse.urlunparse(parts)
|
||
|
+ url = urllib.parse.urlunparse(parts)
|
||
|
return url, parts
|
||
|
|
||
|
def add_prefix(self, url, prefix):
|
||
|
@@ -833,7 +837,7 @@ class URLParser:
|
||
|
passing into urlgrabber.
|
||
|
"""
|
||
|
(scheme, host, path, parm, query, frag) = parts
|
||
|
- path = urllib.quote(path)
|
||
|
+ path = urllib.parse.quote(path)
|
||
|
return (scheme, host, path, parm, query, frag)
|
||
|
|
||
|
hexvals = '0123456789ABCDEF'
|
||
|
@@ -850,7 +854,7 @@ class URLParser:
|
||
|
(scheme, host, path, parm, query, frag) = parts
|
||
|
if ' ' in path:
|
||
|
return 1
|
||
|
- ind = string.find(path, '%')
|
||
|
+ ind = path.find('%')
|
||
|
if ind > -1:
|
||
|
while ind > -1:
|
||
|
if len(path) < ind+3:
|
||
|
@@ -859,7 +863,7 @@ class URLParser:
|
||
|
if code[0] not in self.hexvals or \
|
||
|
code[1] not in self.hexvals:
|
||
|
return 1
|
||
|
- ind = string.find(path, '%', ind+1)
|
||
|
+ ind = path.find('%', ind+1)
|
||
|
return 0
|
||
|
return 1
|
||
|
|
||
|
@@ -879,13 +883,13 @@ class URLGrabberOptions:
|
||
|
def __getattr__(self, name):
|
||
|
if self.delegate and hasattr(self.delegate, name):
|
||
|
return getattr(self.delegate, name)
|
||
|
- raise AttributeError, name
|
||
|
+ raise AttributeError(name)
|
||
|
|
||
|
def raw_throttle(self):
|
||
|
"""Calculate raw throttle value from throttle and bandwidth
|
||
|
values.
|
||
|
"""
|
||
|
- if self.throttle <= 0:
|
||
|
+ if self.throttle is None or self.throttle <= 0:
|
||
|
return 0
|
||
|
elif type(self.throttle) == type(0):
|
||
|
return float(self.throttle)
|
||
|
@@ -937,7 +941,7 @@ class URLGrabberOptions:
|
||
|
def _set_attributes(self, **kwargs):
|
||
|
"""Update object attributes with those provided in kwargs."""
|
||
|
self.__dict__.update(kwargs)
|
||
|
- if kwargs.has_key('range'):
|
||
|
+ if 'range' in kwargs:
|
||
|
# normalize the supplied range value
|
||
|
self.range = range_tuple_normalize(self.range)
|
||
|
if not self.reget in [None, 'simple', 'check_timestamp']:
|
||
|
@@ -1006,7 +1010,7 @@ class URLGrabberOptions:
|
||
|
return self.format()
|
||
|
|
||
|
def format(self, indent=' '):
|
||
|
- keys = self.__dict__.keys()
|
||
|
+ keys = list(self.__dict__.keys())
|
||
|
if self.delegate is not None:
|
||
|
keys.remove('delegate')
|
||
|
keys.sort()
|
||
|
@@ -1026,7 +1030,7 @@ def _do_raise(obj):
|
||
|
def _run_callback(cb, obj):
|
||
|
if not cb:
|
||
|
return
|
||
|
- if callable(cb):
|
||
|
+ if isinstance(cb, collections.Callable):
|
||
|
return cb(obj)
|
||
|
cb, arg, karg = cb
|
||
|
return cb(obj, *arg, **karg)
|
||
|
@@ -1058,16 +1062,15 @@ class URLGrabber(object):
|
||
|
tries = tries + 1
|
||
|
exception = None
|
||
|
callback = None
|
||
|
- if DEBUG: DEBUG.info('attempt %i/%s: %s',
|
||
|
- tries, opts.retry, args[0])
|
||
|
+ if DEBUG: DEBUG.info('attempt %i/%s: %s', tries, opts.retry, args[0])
|
||
|
try:
|
||
|
- r = apply(func, (opts,) + args, {})
|
||
|
+ r = func(*(opts,) + args, **{})
|
||
|
if DEBUG: DEBUG.info('success')
|
||
|
return r
|
||
|
- except URLGrabError, e:
|
||
|
+ except URLGrabError as e:
|
||
|
exception = e
|
||
|
callback = opts.failure_callback
|
||
|
- except KeyboardInterrupt, e:
|
||
|
+ except KeyboardInterrupt as e:
|
||
|
exception = e
|
||
|
callback = opts.interrupt_callback
|
||
|
if not callback:
|
||
|
@@ -1082,13 +1085,13 @@ class URLGrabber(object):
|
||
|
|
||
|
if (opts.retry is None) or (tries == opts.retry):
|
||
|
if DEBUG: DEBUG.info('retries exceeded, re-raising')
|
||
|
- raise
|
||
|
+ raise exception
|
||
|
|
||
|
retrycode = getattr(exception, 'errno', None)
|
||
|
if (retrycode is not None) and (retrycode not in opts.retrycodes):
|
||
|
if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
|
||
|
retrycode, opts.retrycodes)
|
||
|
- raise
|
||
|
+ raise exception
|
||
|
|
||
|
def urlopen(self, url, opts=None, **kwargs):
|
||
|
"""open the url and return a file object
|
||
|
@@ -1119,14 +1122,14 @@ class URLGrabber(object):
|
||
|
(scheme, host, path, parm, query, frag) = parts
|
||
|
opts.find_proxy(url, scheme)
|
||
|
if filename is None:
|
||
|
- filename = os.path.basename( urllib.unquote(path) )
|
||
|
+ filename = os.path.basename( urllib.parse.unquote(path) )
|
||
|
if not filename:
|
||
|
# This is better than nothing.
|
||
|
filename = 'index.html'
|
||
|
if scheme == 'file' and not opts.copy_local:
|
||
|
# just return the name of the local file - don't make a
|
||
|
# copy currently
|
||
|
- path = urllib.url2pathname(path)
|
||
|
+ path = urllib.request.url2pathname(path)
|
||
|
if host:
|
||
|
path = os.path.normpath('//' + host + path)
|
||
|
if not os.path.exists(path):
|
||
|
@@ -1170,7 +1173,7 @@ class URLGrabber(object):
|
||
|
|
||
|
try:
|
||
|
return self._retry(opts, retryfunc, url, filename)
|
||
|
- except URLGrabError, e:
|
||
|
+ except URLGrabError as e:
|
||
|
_TH.update(url, 0, 0, e)
|
||
|
opts.exception = e
|
||
|
return _run_callback(opts.failfunc, opts)
|
||
|
@@ -1219,7 +1222,7 @@ class URLGrabber(object):
|
||
|
|
||
|
def _make_callback(self, callback_obj):
|
||
|
# not used, left for compatibility
|
||
|
- if callable(callback_obj):
|
||
|
+ if isinstance(callback_obj, collections.Callable):
|
||
|
return callback_obj, (), {}
|
||
|
else:
|
||
|
return callback_obj
|
||
|
@@ -1235,13 +1238,13 @@ class PyCurlFileObject(object):
|
||
|
self._hdr_dump = ''
|
||
|
self._parsed_hdr = None
|
||
|
self.url = url
|
||
|
- self.scheme = urlparse.urlsplit(self.url)[0]
|
||
|
+ self.scheme = urllib.parse.urlsplit(self.url)[0]
|
||
|
self.filename = filename
|
||
|
self.append = False
|
||
|
self.reget_time = None
|
||
|
self.opts = opts
|
||
|
if self.opts.reget == 'check_timestamp':
|
||
|
- raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this."
|
||
|
+ raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.")
|
||
|
self._complete = False
|
||
|
self._rbuf = ''
|
||
|
self._rbufsize = 1024*8
|
||
|
@@ -1266,7 +1269,7 @@ class PyCurlFileObject(object):
|
||
|
|
||
|
if hasattr(self.fo, name):
|
||
|
return getattr(self.fo, name)
|
||
|
- raise AttributeError, name
|
||
|
+ raise AttributeError(name)
|
||
|
|
||
|
def _retrieve(self, buf):
|
||
|
try:
|
||
|
@@ -1280,7 +1283,7 @@ class PyCurlFileObject(object):
|
||
|
if self.opts.progress_obj:
|
||
|
size = self.size + self._reget_length
|
||
|
self.opts.progress_obj.start(self._prog_reportname,
|
||
|
- urllib.unquote(self.url),
|
||
|
+ urllib.parse.unquote(self.url),
|
||
|
self._prog_basename,
|
||
|
size=size,
|
||
|
text=self.opts.text)
|
||
|
@@ -1295,10 +1298,16 @@ class PyCurlFileObject(object):
|
||
|
start = self._range[0] - pos
|
||
|
stop = self._range[1] - pos
|
||
|
if start < len(buf) and stop > 0:
|
||
|
- self.fo.write(buf[max(start, 0):stop])
|
||
|
+ if not six.PY3 or isinstance(self.fo, StringIO):
|
||
|
+ self.fo.write(buf[max(start, 0):stop].decode('utf-8'))
|
||
|
+ else:
|
||
|
+ self.fo.write(buf[max(start, 0):stop])
|
||
|
else:
|
||
|
- self.fo.write(buf)
|
||
|
- except IOError, e:
|
||
|
+ if not six.PY3 or isinstance(self.fo, StringIO):
|
||
|
+ self.fo.write(buf.decode('utf-8'))
|
||
|
+ else:
|
||
|
+ self.fo.write(buf)
|
||
|
+ except IOError as e:
|
||
|
self._cb_error = URLGrabError(16, exception2msg(e))
|
||
|
return -1
|
||
|
return len(buf)
|
||
|
@@ -1319,10 +1328,12 @@ class PyCurlFileObject(object):
|
||
|
# but we can't do that w/o making it do 2 connects, which sucks
|
||
|
# so we cheat and stuff it in here in the hdr_retrieve
|
||
|
if self.scheme in ['http','https']:
|
||
|
- if buf.lower().find('content-length:') != -1:
|
||
|
- length = buf.split(':')[1]
|
||
|
+ content_length_str = 'content-length:' if not six.PY3 else b'content-length:'
|
||
|
+ if buf.lower().find(content_length_str) != -1:
|
||
|
+ split_str = ':' if not six.PY3 else b':'
|
||
|
+ length = buf.split(split_str)[1]
|
||
|
self.size = int(length)
|
||
|
- elif (self.append or self.opts.range) and self._hdr_dump == '' and ' 200 ' in buf:
|
||
|
+ elif (self.append or self.opts.range) and self._hdr_dump == '' and b' 200 ' in buf:
|
||
|
# reget was attempted but server sends it all
|
||
|
# undo what we did in _build_range()
|
||
|
self.append = False
|
||
|
@@ -1333,23 +1344,26 @@ class PyCurlFileObject(object):
|
||
|
self.fo.truncate(0)
|
||
|
elif self.scheme in ['ftp']:
|
||
|
s = None
|
||
|
- if buf.startswith('213 '):
|
||
|
+ if buf.startswith(b'213 '):
|
||
|
s = buf[3:].strip()
|
||
|
if len(s) >= 14:
|
||
|
s = None # ignore MDTM responses
|
||
|
- elif buf.startswith('150 '):
|
||
|
- s = parse150(buf)
|
||
|
+ elif buf.startswith(b'150 '):
|
||
|
+ s = parse150(buf if not six.PY3 else buf.decode('utf-8'))
|
||
|
if s:
|
||
|
self.size = int(s)
|
||
|
|
||
|
- if buf.lower().find('location') != -1:
|
||
|
- location = ':'.join(buf.split(':')[1:])
|
||
|
+ location_str = 'location' if not six.PY3 else b'location'
|
||
|
+ if buf.lower().find(location_str) != -1:
|
||
|
+ buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||
|
+ location = ':'.join(buf_compat.split(':')[1:])
|
||
|
location = location.strip()
|
||
|
- self.scheme = urlparse.urlsplit(location)[0]
|
||
|
+ self.scheme = urllib.parse.urlsplit(location)[0]
|
||
|
self.url = location
|
||
|
|
||
|
- self._hdr_dump += buf
|
||
|
- if len(self._hdr_dump) != 0 and buf == '\r\n':
|
||
|
+ self._hdr_dump += buf if not six.PY3 else buf.decode('utf-8')
|
||
|
+ end_str = '\r\n' if not six.PY3 else b'\r\n'
|
||
|
+ if len(self._hdr_dump) != 0 and buf == end_str:
|
||
|
self._hdr_ended = True
|
||
|
if DEBUG: DEBUG.debug('header ended:')
|
||
|
|
||
|
@@ -1365,7 +1379,7 @@ class PyCurlFileObject(object):
|
||
|
hdrfp = StringIO()
|
||
|
hdrfp.write(self._hdr_dump[statusend:])
|
||
|
hdrfp.seek(0)
|
||
|
- self._parsed_hdr = mimetools.Message(hdrfp)
|
||
|
+ self._parsed_hdr = Message(hdrfp)
|
||
|
return self._parsed_hdr
|
||
|
|
||
|
hdr = property(_return_hdr_obj)
|
||
|
@@ -1490,7 +1504,7 @@ class PyCurlFileObject(object):
|
||
|
|
||
|
try:
|
||
|
self.curl_obj.perform()
|
||
|
- except pycurl.error, e:
|
||
|
+ except pycurl.error as e:
|
||
|
# XXX - break some of these out a bit more clearly
|
||
|
# to other URLGrabErrors from
|
||
|
# http://curl.haxx.se/libcurl/c/libcurl-errors.html
|
||
|
@@ -1498,7 +1512,7 @@ class PyCurlFileObject(object):
|
||
|
|
||
|
code = self.http_code
|
||
|
errcode = e.args[0]
|
||
|
- errurl = urllib.unquote(self.url)
|
||
|
+ errurl = urllib.parse.unquote(self.url)
|
||
|
|
||
|
if self._error[0]:
|
||
|
errcode = self._error[0]
|
||
|
@@ -1588,7 +1602,7 @@ class PyCurlFileObject(object):
|
||
|
if self._error[1]:
|
||
|
msg = self._error[1]
|
||
|
err = URLGrabError(14, msg)
|
||
|
- err.url = urllib.unquote(self.url)
|
||
|
+ err.url = urllib.parse.unquote(self.url)
|
||
|
raise err
|
||
|
|
||
|
def _do_open(self):
|
||
|
@@ -1605,7 +1619,7 @@ class PyCurlFileObject(object):
|
||
|
def _build_range(self):
|
||
|
reget_length = 0
|
||
|
rt = None
|
||
|
- if self.opts.reget and type(self.filename) in types.StringTypes:
|
||
|
+ if self.opts.reget and type(self.filename) in (type(str()), six.text_type):
|
||
|
# we have reget turned on and we're dumping to a file
|
||
|
try:
|
||
|
s = os.stat(self.filename)
|
||
|
@@ -1655,22 +1669,22 @@ class PyCurlFileObject(object):
|
||
|
else:
|
||
|
fo = opener.open(req)
|
||
|
hdr = fo.info()
|
||
|
- except ValueError, e:
|
||
|
+ except ValueError as e:
|
||
|
err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, ))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
- except RangeError, e:
|
||
|
+ except RangeError as e:
|
||
|
err = URLGrabError(9, _('%s on %s') % (e, self.url))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
- except urllib2.HTTPError, e:
|
||
|
+ except urllib.error.HTTPError as e:
|
||
|
new_e = URLGrabError(14, _('%s on %s') % (e, self.url))
|
||
|
new_e.code = e.code
|
||
|
new_e.exception = e
|
||
|
new_e.url = self.url
|
||
|
raise new_e
|
||
|
- except IOError, e:
|
||
|
+ except IOError as e:
|
||
|
if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
|
||
|
err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
|
||
|
err.url = self.url
|
||
|
@@ -1680,12 +1694,12 @@ class PyCurlFileObject(object):
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
- except OSError, e:
|
||
|
+ except OSError as e:
|
||
|
err = URLGrabError(5, _('%s on %s') % (e, self.url))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
- except HTTPException, e:
|
||
|
+ except HTTPException as e:
|
||
|
err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \
|
||
|
(e.__class__.__name__, self.url, e))
|
||
|
err.url = self.url
|
||
|
@@ -1700,19 +1714,21 @@ class PyCurlFileObject(object):
|
||
|
if self._complete:
|
||
|
return
|
||
|
_was_filename = False
|
||
|
- if type(self.filename) in types.StringTypes and self.filename:
|
||
|
+ if self.filename and type(self.filename) in (type(str()), six.text_type):
|
||
|
_was_filename = True
|
||
|
self._prog_reportname = str(self.filename)
|
||
|
self._prog_basename = os.path.basename(self.filename)
|
||
|
|
||
|
- if self.append: mode = 'ab'
|
||
|
- else: mode = 'wb'
|
||
|
+ if self.append:
|
||
|
+ mode = 'ab'
|
||
|
+ else:
|
||
|
+ mode = 'wb'
|
||
|
|
||
|
- if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
|
||
|
- (self.filename, mode))
|
||
|
+ if DEBUG:
|
||
|
+ DEBUG.info('opening local file "%s" with mode %s' % (self.filename, mode))
|
||
|
try:
|
||
|
self.fo = open(self.filename, mode)
|
||
|
- except IOError, e:
|
||
|
+ except IOError as e:
|
||
|
err = URLGrabError(16, _(\
|
||
|
'error opening local file from %s, IOError: %s') % (self.url, e))
|
||
|
err.url = self.url
|
||
|
@@ -1731,7 +1747,7 @@ class PyCurlFileObject(object):
|
||
|
|
||
|
try:
|
||
|
self._do_perform()
|
||
|
- except URLGrabError, e:
|
||
|
+ except URLGrabError as e:
|
||
|
self.fo.flush()
|
||
|
self.fo.close()
|
||
|
raise e
|
||
|
@@ -1754,7 +1770,7 @@ class PyCurlFileObject(object):
|
||
|
if mod_time != -1:
|
||
|
try:
|
||
|
os.utime(self.filename, (mod_time, mod_time))
|
||
|
- except OSError, e:
|
||
|
+ except OSError as e:
|
||
|
err = URLGrabError(16, _(\
|
||
|
'error setting timestamp on file %s from %s, OSError: %s')
|
||
|
% (self.filename, self.url, e))
|
||
|
@@ -1763,7 +1779,7 @@ class PyCurlFileObject(object):
|
||
|
# re open it
|
||
|
try:
|
||
|
self.fo = open(self.filename, 'r')
|
||
|
- except IOError, e:
|
||
|
+ except IOError as e:
|
||
|
err = URLGrabError(16, _(\
|
||
|
'error opening file from %s, IOError: %s') % (self.url, e))
|
||
|
err.url = self.url
|
||
|
@@ -1809,25 +1825,27 @@ class PyCurlFileObject(object):
|
||
|
else: readamount = min(amt, self._rbufsize)
|
||
|
try:
|
||
|
new = self.fo.read(readamount)
|
||
|
- except socket.error, e:
|
||
|
+ except socket.error as e:
|
||
|
err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
- except socket.timeout, e:
|
||
|
+ except socket.timeout as e:
|
||
|
raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
- except IOError, e:
|
||
|
+ except IOError as e:
|
||
|
raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e))
|
||
|
err.url = self.url
|
||
|
raise err
|
||
|
|
||
|
newsize = len(new)
|
||
|
- if not newsize: break # no more to read
|
||
|
+ if not newsize:
|
||
|
+ break # no more to read
|
||
|
|
||
|
- if amt: amt = amt - newsize
|
||
|
+ if amt:
|
||
|
+ amt = amt - newsize
|
||
|
buf.append(new)
|
||
|
bufsize = bufsize + newsize
|
||
|
self._tsize = newsize
|
||
|
@@ -1835,7 +1853,7 @@ class PyCurlFileObject(object):
|
||
|
#if self.opts.progress_obj:
|
||
|
# self.opts.progress_obj.update(self._amount_read)
|
||
|
|
||
|
- self._rbuf = string.join(buf, '')
|
||
|
+ self._rbuf = ''.join(buf)
|
||
|
return
|
||
|
|
||
|
def _progress_update(self, download_total, downloaded, upload_total, uploaded):
|
||
|
@@ -1879,12 +1897,12 @@ class PyCurlFileObject(object):
|
||
|
if not self._complete: self._do_grab()
|
||
|
return self.fo.readline()
|
||
|
|
||
|
- i = string.find(self._rbuf, '\n')
|
||
|
+ i = self._rbuf.find('\n')
|
||
|
while i < 0 and not (0 < limit <= len(self._rbuf)):
|
||
|
L = len(self._rbuf)
|
||
|
self._fill_buffer(L + self._rbufsize)
|
||
|
if not len(self._rbuf) > L: break
|
||
|
- i = string.find(self._rbuf, '\n', L)
|
||
|
+ i = self._rbuf.find('\n', L)
|
||
|
|
||
|
if i < 0: i = len(self._rbuf)
|
||
|
else: i = i+1
|
||
|
@@ -1968,9 +1986,9 @@ def _dumps(v):
|
||
|
if v is None: return 'None'
|
||
|
if v is True: return 'True'
|
||
|
if v is False: return 'False'
|
||
|
- if type(v) in (int, long, float):
|
||
|
+ if type(v) in six.integer_types + (float,):
|
||
|
return str(v)
|
||
|
- if type(v) == unicode:
|
||
|
+ if not six.PY3 and type(v) == unicode:
|
||
|
v = v.encode('UTF8')
|
||
|
if type(v) == str:
|
||
|
def quoter(c): return _quoter_map.get(c, c)
|
||
|
@@ -1979,17 +1997,21 @@ def _dumps(v):
|
||
|
return "(%s)" % ','.join(map(_dumps, v))
|
||
|
if type(v) == list:
|
||
|
return "[%s]" % ','.join(map(_dumps, v))
|
||
|
- raise TypeError, 'Can\'t serialize %s' % v
|
||
|
+ raise TypeError('Can\'t serialize %s' % v)
|
||
|
|
||
|
def _loads(s):
|
||
|
def decode(v):
|
||
|
if v == 'None': return None
|
||
|
if v == 'True': return True
|
||
|
if v == 'False': return False
|
||
|
- try: return int(v)
|
||
|
- except ValueError: pass
|
||
|
- try: return float(v)
|
||
|
- except ValueError: pass
|
||
|
+ try:
|
||
|
+ return int(v)
|
||
|
+ except ValueError:
|
||
|
+ pass
|
||
|
+ try:
|
||
|
+ return float(v)
|
||
|
+ except ValueError:
|
||
|
+ pass
|
||
|
if len(v) >= 2 and v[0] == v[-1] == "'":
|
||
|
ret = []; i = 1
|
||
|
while True:
|
||
|
@@ -2033,9 +2055,11 @@ def _readlines(fd):
|
||
|
buf = os.read(fd, 4096)
|
||
|
if not buf: return None
|
||
|
# whole lines only, no buffering
|
||
|
- while buf[-1] != '\n':
|
||
|
+ buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||
|
+ while buf_compat[-1] != '\n':
|
||
|
buf += os.read(fd, 4096)
|
||
|
- return buf[:-1].split('\n')
|
||
|
+ buf_compat = buf if not six.PY3 else buf.decode('utf-8')
|
||
|
+ return buf_compat[:-1].split('\n')
|
||
|
|
||
|
import subprocess
|
||
|
|
||
|
@@ -2071,7 +2095,8 @@ class _ExternalDownloader:
|
||
|
arg = []
|
||
|
for k in self._options:
|
||
|
v = getattr(opts, k)
|
||
|
- if v is None: continue
|
||
|
+ if v is None:
|
||
|
+ continue
|
||
|
arg.append('%s=%s' % (k, _dumps(v)))
|
||
|
if opts.progress_obj and opts.multi_progress_obj:
|
||
|
arg.append('progress_obj=True')
|
||
|
@@ -2080,7 +2105,8 @@ class _ExternalDownloader:
|
||
|
|
||
|
self.cnt += 1
|
||
|
self.running[self.cnt] = opts
|
||
|
- os.write(self.stdin, arg +'\n')
|
||
|
+ result = arg +'\n'
|
||
|
+ os.write(self.stdin, result if not six.PY3 else result.encode('utf-8'))
|
||
|
|
||
|
def perform(self):
|
||
|
ret = []
|
||
|
@@ -2091,7 +2117,7 @@ class _ExternalDownloader:
|
||
|
for line in lines:
|
||
|
# parse downloader output
|
||
|
line = line.split(' ', 6)
|
||
|
- _id, size = map(int, line[:2])
|
||
|
+ _id, size = list(map(int, line[:2]))
|
||
|
if len(line) == 2:
|
||
|
self.running[_id]._progress.update(size)
|
||
|
continue
|
||
|
@@ -2121,7 +2147,7 @@ class _ExternalDownloaderPool:
|
||
|
self.cache = {}
|
||
|
|
||
|
def start(self, opts):
|
||
|
- host = urlparse.urlsplit(opts.url).netloc
|
||
|
+ host = urllib.parse.urlsplit(opts.url).netloc
|
||
|
dl = self.cache.pop(host, None)
|
||
|
if not dl:
|
||
|
dl = _ExternalDownloader()
|
||
|
@@ -2144,8 +2170,9 @@ class _ExternalDownloaderPool:
|
||
|
ret.extend(done)
|
||
|
|
||
|
# dl finished, move it to the cache
|
||
|
- host = urlparse.urlsplit(done[0][0].url).netloc
|
||
|
- if host in self.cache: self.cache[host].abort()
|
||
|
+ host = urllib.parse.urlsplit(done[0][0].url).netloc
|
||
|
+ if host in self.cache:
|
||
|
+ self.cache[host].abort()
|
||
|
self.epoll.unregister(fd)
|
||
|
self.cache[host] = self.running.pop(fd)
|
||
|
return ret
|
||
|
@@ -2189,7 +2216,7 @@ def parallel_wait(meter=None):
|
||
|
opts.tries = tries
|
||
|
try:
|
||
|
dl.start(opts)
|
||
|
- except OSError, e:
|
||
|
+ except OSError as e:
|
||
|
# can't spawn downloader, give up immediately
|
||
|
opts.exception = URLGrabError(5, exception2msg(e))
|
||
|
_run_callback(opts.failfunc, opts)
|
||
|
@@ -2212,7 +2239,8 @@ def parallel_wait(meter=None):
|
||
|
if ug_err is None:
|
||
|
if opts.checkfunc:
|
||
|
try: _run_callback(opts.checkfunc, opts)
|
||
|
- except URLGrabError, ug_err: pass
|
||
|
+ except URLGrabError:
|
||
|
+ pass
|
||
|
|
||
|
if opts.progress_obj:
|
||
|
if opts.multi_progress_obj:
|
||
|
@@ -2242,8 +2270,9 @@ def parallel_wait(meter=None):
|
||
|
retry = opts.retry or 0
|
||
|
if opts.failure_callback:
|
||
|
opts.exception = ug_err
|
||
|
- try: _run_callback(opts.failure_callback, opts)
|
||
|
- except URLGrabError, ug_err:
|
||
|
+ try:
|
||
|
+ _run_callback(opts.failure_callback, opts)
|
||
|
+ except URLGrabError:
|
||
|
retry = 0 # no retries
|
||
|
if opts.tries < retry and ug_err.errno in opts.retrycodes:
|
||
|
start(opts, opts.tries + 1) # simple retry
|
||
|
@@ -2293,8 +2322,7 @@ def parallel_wait(meter=None):
|
||
|
# check global limit
|
||
|
while len(dl.running) >= default_grabber.opts.max_connections:
|
||
|
perform()
|
||
|
- if DEBUG:
|
||
|
- DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections)
|
||
|
+ if DEBUG: DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections)
|
||
|
|
||
|
if opts.mirror_group:
|
||
|
mg, errors, failed, removed = opts.mirror_group
|
||
|
@@ -2345,12 +2373,12 @@ def parallel_wait(meter=None):
|
||
|
limit = 1
|
||
|
while host_con.get(key, 0) >= (limit or 2):
|
||
|
perform()
|
||
|
- if DEBUG:
|
||
|
- DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit)
|
||
|
+ if DEBUG: DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit)
|
||
|
|
||
|
start(opts, 1)
|
||
|
- except IOError, e:
|
||
|
- if e.errno != 4: raise
|
||
|
+ except IOError as e:
|
||
|
+ if e.errno != 4:
|
||
|
+ raise
|
||
|
raise KeyboardInterrupt
|
||
|
|
||
|
finally:
|
||
|
@@ -2399,7 +2427,7 @@ class _TH:
|
||
|
def update(url, dl_size, dl_time, ug_err, baseurl=None):
|
||
|
# Use hostname from URL. If it's a file:// URL, use baseurl.
|
||
|
# If no baseurl, do not update timedhosts.
|
||
|
- host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
|
||
|
+ host = urllib.parse.urlsplit(url).netloc.split('@')[-1] or baseurl
|
||
|
if not host: return
|
||
|
|
||
|
_TH.load()
|
||
|
@@ -2431,7 +2459,7 @@ class _TH:
|
||
|
_TH.load()
|
||
|
|
||
|
# Use just the hostname, unless it's a file:// baseurl.
|
||
|
- host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
|
||
|
+ host = urllib.parse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
|
||
|
|
||
|
default_speed = default_grabber.opts.default_speed
|
||
|
try: speed, fail, ts = _TH.hosts[host]
|
||
|
@@ -2447,68 +2475,67 @@ class _TH:
|
||
|
def _main_test():
|
||
|
try: url, filename = sys.argv[1:3]
|
||
|
except ValueError:
|
||
|
- print 'usage:', sys.argv[0], \
|
||
|
- '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
|
||
|
+ print('usage:', sys.argv[0], \
|
||
|
+ '<url> <filename> [copy_local=0|1] [close_connection=0|1]')
|
||
|
sys.exit()
|
||
|
|
||
|
kwargs = {}
|
||
|
for a in sys.argv[3:]:
|
||
|
- k, v = string.split(a, '=', 1)
|
||
|
+ k, v = a.split('=', 1)
|
||
|
kwargs[k] = int(v)
|
||
|
|
||
|
set_throttle(1.0)
|
||
|
set_bandwidth(32 * 1024)
|
||
|
- print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
|
||
|
- default_grabber.bandwidth)
|
||
|
+ print("throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
|
||
|
+ default_grabber.bandwidth))
|
||
|
|
||
|
- try: from progress import text_progress_meter
|
||
|
- except ImportError, e: pass
|
||
|
+ try: from .progress import text_progress_meter
|
||
|
+ except ImportError: pass
|
||
|
else: kwargs['progress_obj'] = text_progress_meter()
|
||
|
|
||
|
- try: name = apply(urlgrab, (url, filename), kwargs)
|
||
|
- except URLGrabError, e: print e
|
||
|
- else: print 'LOCAL FILE:', name
|
||
|
+ try: name = urlgrab(*(url, filename), **kwargs)
|
||
|
+ except URLGrabError as e: print(e)
|
||
|
+ else: print('LOCAL FILE:', name)
|
||
|
|
||
|
|
||
|
def _retry_test():
|
||
|
try: url, filename = sys.argv[1:3]
|
||
|
except ValueError:
|
||
|
- print 'usage:', sys.argv[0], \
|
||
|
- '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
|
||
|
+ print('usage:', sys.argv[0], \
|
||
|
+ '<url> <filename> [copy_local=0|1] [close_connection=0|1]')
|
||
|
sys.exit()
|
||
|
|
||
|
kwargs = {}
|
||
|
for a in sys.argv[3:]:
|
||
|
- k, v = string.split(a, '=', 1)
|
||
|
+ k, v = a.split('=', 1)
|
||
|
kwargs[k] = int(v)
|
||
|
|
||
|
- try: from progress import text_progress_meter
|
||
|
- except ImportError, e: pass
|
||
|
+ try: from .progress import text_progress_meter
|
||
|
+ except ImportError: pass
|
||
|
else: kwargs['progress_obj'] = text_progress_meter()
|
||
|
|
||
|
def cfunc(filename, hello, there='foo'):
|
||
|
- print hello, there
|
||
|
+ print(hello, there)
|
||
|
import random
|
||
|
rnum = random.random()
|
||
|
if rnum < .5:
|
||
|
- print 'forcing retry'
|
||
|
+ print('forcing retry')
|
||
|
raise URLGrabError(-1, 'forcing retry')
|
||
|
if rnum < .75:
|
||
|
- print 'forcing failure'
|
||
|
+ print('forcing failure')
|
||
|
raise URLGrabError(-2, 'forcing immediate failure')
|
||
|
- print 'success'
|
||
|
+ print('success')
|
||
|
return
|
||
|
|
||
|
kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
|
||
|
- try: name = apply(retrygrab, (url, filename), kwargs)
|
||
|
- except URLGrabError, e: print e
|
||
|
- else: print 'LOCAL FILE:', name
|
||
|
+ try: name = retrygrab(*(url, filename), **kwargs)
|
||
|
+ except URLGrabError as e: print(e)
|
||
|
+ else: print('LOCAL FILE:', name)
|
||
|
|
||
|
def _file_object_test(filename=None):
|
||
|
- import cStringIO
|
||
|
if filename is None:
|
||
|
filename = __file__
|
||
|
- print 'using file "%s" for comparisons' % filename
|
||
|
+ print('using file "%s" for comparisons' % filename)
|
||
|
fo = open(filename)
|
||
|
s_input = fo.read()
|
||
|
fo.close()
|
||
|
@@ -2517,14 +2544,13 @@ def _file_object_test(filename=None):
|
||
|
_test_file_object_readall,
|
||
|
_test_file_object_readline,
|
||
|
_test_file_object_readlines]:
|
||
|
- fo_input = cStringIO.StringIO(s_input)
|
||
|
- fo_output = cStringIO.StringIO()
|
||
|
+ fo_input = StringIO(s_input)
|
||
|
+ fo_output = StringIO()
|
||
|
wrapper = PyCurlFileObject(fo_input, None, 0)
|
||
|
- print 'testing %-30s ' % testfunc.__name__,
|
||
|
- testfunc(wrapper, fo_output)
|
||
|
+ print('testing %-30s ' % testfunc.__name__, testfunc(wrapper, fo_output))
|
||
|
s_output = fo_output.getvalue()
|
||
|
- if s_output == s_input: print 'passed'
|
||
|
- else: print 'FAILED'
|
||
|
+ if s_output == s_input: print('passed')
|
||
|
+ else: print('FAILED')
|
||
|
|
||
|
def _test_file_object_smallread(wrapper, fo_output):
|
||
|
while 1:
|
||
|
@@ -2544,7 +2570,7 @@ def _test_file_object_readline(wrapper, fo_output):
|
||
|
|
||
|
def _test_file_object_readlines(wrapper, fo_output):
|
||
|
li = wrapper.readlines()
|
||
|
- fo_output.write(string.join(li, ''))
|
||
|
+ fo_output.write(''.join(li))
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
_main_test()
|
||
|
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
|
||
|
index 988a309..f3c2664 100644
|
||
|
--- a/urlgrabber/mirror.py
|
||
|
+++ b/urlgrabber/mirror.py
|
||
|
@@ -92,13 +92,14 @@ CUSTOMIZATION
|
||
|
|
||
|
|
||
|
import sys
|
||
|
+import six
|
||
|
import random
|
||
|
-import thread # needed for locking to make this threadsafe
|
||
|
+from six.moves import _thread as thread # needed for locking to make this threadsafe
|
||
|
|
||
|
-from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
|
||
|
-from grabber import _run_callback, _do_raise
|
||
|
-from grabber import exception2msg
|
||
|
-from grabber import _TH
|
||
|
+from urlgrabber.grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
|
||
|
+from urlgrabber.grabber import _run_callback, _do_raise
|
||
|
+from urlgrabber.grabber import exception2msg
|
||
|
+from urlgrabber.grabber import _TH
|
||
|
|
||
|
def _(st):
|
||
|
return st
|
||
|
@@ -286,7 +287,7 @@ class MirrorGroup:
|
||
|
def _parse_mirrors(self, mirrors):
|
||
|
parsed_mirrors = []
|
||
|
for m in mirrors:
|
||
|
- if isinstance(m, basestring):
|
||
|
+ if isinstance(m, six.string_types):
|
||
|
m = {'mirror': _to_utf8(m)}
|
||
|
parsed_mirrors.append(m)
|
||
|
return parsed_mirrors
|
||
|
@@ -423,7 +424,7 @@ class MirrorGroup:
|
||
|
if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
|
||
|
try:
|
||
|
return func_ref( *(fullurl,), opts=opts, **kw )
|
||
|
- except URLGrabError, e:
|
||
|
+ except URLGrabError as e:
|
||
|
if DEBUG: DEBUG.info('MIRROR: failed')
|
||
|
gr.errors.append((fullurl, exception2msg(e)))
|
||
|
obj = CallbackObject()
|
||
|
@@ -446,7 +447,7 @@ class MirrorGroup:
|
||
|
func = 'urlgrab'
|
||
|
try:
|
||
|
return self._mirror_try(func, url, kw)
|
||
|
- except URLGrabError, e:
|
||
|
+ except URLGrabError as e:
|
||
|
obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs)
|
||
|
return _run_callback(kwargs.get('failfunc', _do_raise), obj)
|
||
|
|
||
|
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
|
||
|
index 9b77c54..2235397 100644
|
||
|
--- a/urlgrabber/progress.py
|
||
|
+++ b/urlgrabber/progress.py
|
||
|
@@ -19,9 +19,10 @@
|
||
|
|
||
|
|
||
|
import sys
|
||
|
+import six
|
||
|
import time
|
||
|
import math
|
||
|
-import thread
|
||
|
+from six.moves import _thread as thread
|
||
|
import fcntl
|
||
|
import struct
|
||
|
import termios
|
||
|
@@ -606,7 +607,7 @@ class TextMultiFileMeter(MultiFileMeter):
|
||
|
try:
|
||
|
format = "%-30.30s %6.6s %s"
|
||
|
fn = meter.text or meter.basename
|
||
|
- if type(message) in (type(''), type(u'')):
|
||
|
+ if type(message) in (type(''), type('')):
|
||
|
message = message.splitlines()
|
||
|
if not message: message = ['']
|
||
|
out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
|
||
|
@@ -778,7 +779,7 @@ def format_number(number, SI=0, space=' '):
|
||
|
depth = depth + 1
|
||
|
number = number / step
|
||
|
|
||
|
- if type(number) == type(1) or type(number) == type(1L):
|
||
|
+ if type(number) in six.integer_types:
|
||
|
# it's an int or a long, which means it didn't get divided,
|
||
|
# which means it's already short enough
|
||
|
format = '%i%s%s'
|
||
|
@@ -806,7 +807,7 @@ def _tst(fn, cur, tot, beg, size, *args):
|
||
|
tm.end(size)
|
||
|
|
||
|
def _mtst(datas, *args):
|
||
|
- print '-' * 79
|
||
|
+ print('-' * 79)
|
||
|
tm = TextMultiFileMeter(threaded=False)
|
||
|
|
||
|
dl_sizes = {}
|