parent
93f8077e86
commit
c7b47beb48
@ -1 +1 @@
|
|||||||
urlgrabber-2.9.9.tar.gz
|
urlgrabber-3.0.0.tar.gz
|
||||||
|
@ -1 +1 @@
|
|||||||
a1001edcc026de2848714b81e5bdb939 urlgrabber-2.9.9.tar.gz
|
3cdb34db3269baf8006da35b9f82d9c9 urlgrabber-3.0.0.tar.gz
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
--- urlgrabber/grabber.py~ 2006-02-22 13:26:46.000000000 -0500
|
|
||||||
+++ urlgrabber/grabber.py 2006-03-14 14:17:59.000000000 -0500
|
|
||||||
@@ -1140,6 +1140,8 @@
|
|
||||||
raise URLGrabError(4, _('Socket Error: %s') % (e, ))
|
|
||||||
except TimeoutError, e:
|
|
||||||
raise URLGrabError(12, _('Timeout: %s') % (e, ))
|
|
||||||
+ except IOError, e:
|
|
||||||
+ raise URLGrabError(4, _('IOError: %s') %(e,))
|
|
||||||
newsize = len(new)
|
|
||||||
if not newsize: break # no more to read
|
|
||||||
|
|
@ -1,157 +0,0 @@
|
|||||||
Index: urlgrabber/byterange.py
|
|
||||||
===================================================================
|
|
||||||
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/byterange.py,v
|
|
||||||
retrieving revision 1.11
|
|
||||||
diff -u -r1.11 byterange.py
|
|
||||||
--- urlgrabber/byterange.py 22 Oct 2005 21:57:28 -0000 1.11
|
|
||||||
+++ urlgrabber/byterange.py 13 Jul 2006 19:48:26 -0000
|
|
||||||
@@ -71,6 +71,15 @@
|
|
||||||
# HTTP's Range Not Satisfiable error
|
|
||||||
raise RangeError('Requested Range Not Satisfiable')
|
|
||||||
|
|
||||||
+class HTTPSRangeHandler(HTTPRangeHandler):
|
|
||||||
+ """ Range Header support for HTTPS. """
|
|
||||||
+
|
|
||||||
+ def https_error_206(self, req, fp, code, msg, hdrs):
|
|
||||||
+ return self.http_error_206(req, fp, code, msg, hdrs)
|
|
||||||
+
|
|
||||||
+ def https_error_416(self, req, fp, code, msg, hdrs):
|
|
||||||
+ self.https_error_416(req, fp, code, msg, hdrs)
|
|
||||||
+
|
|
||||||
class RangeableFileObject:
|
|
||||||
"""File object wrapper to enable raw range handling.
|
|
||||||
This was implemented primarilary for handling range
|
|
||||||
Index: urlgrabber/grabber.py
|
|
||||||
===================================================================
|
|
||||||
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/grabber.py,v
|
|
||||||
retrieving revision 1.46
|
|
||||||
diff -u -r1.46 grabber.py
|
|
||||||
--- urlgrabber/grabber.py 22 Mar 2006 20:09:33 -0000 1.46
|
|
||||||
+++ urlgrabber/grabber.py 13 Jul 2006 19:48:26 -0000
|
|
||||||
@@ -402,24 +402,25 @@
|
|
||||||
# This is a convenient way to make keepalive optional.
|
|
||||||
# Just rename the module so it can't be imported.
|
|
||||||
import keepalive
|
|
||||||
- from keepalive import HTTPHandler
|
|
||||||
+ from keepalive import HTTPHandler, HTTPSHandler
|
|
||||||
except ImportError, msg:
|
|
||||||
- keepalive_handler = None
|
|
||||||
+ keepalive_handlers = ()
|
|
||||||
else:
|
|
||||||
- keepalive_handler = HTTPHandler()
|
|
||||||
+ keepalive_handlers = (HTTPHandler(), HTTPSHandler())
|
|
||||||
|
|
||||||
try:
|
|
||||||
# add in range support conditionally too
|
|
||||||
import byterange
|
|
||||||
- from byterange import HTTPRangeHandler, FileRangeHandler, \
|
|
||||||
- FTPRangeHandler, range_tuple_normalize, range_tuple_to_header, \
|
|
||||||
- RangeError
|
|
||||||
+ from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
|
|
||||||
+ FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
|
|
||||||
+ range_tuple_to_header, RangeError
|
|
||||||
except ImportError, msg:
|
|
||||||
range_handlers = ()
|
|
||||||
RangeError = None
|
|
||||||
have_range = 0
|
|
||||||
else:
|
|
||||||
- range_handlers = (HTTPRangeHandler(), FileRangeHandler(), FTPRangeHandler())
|
|
||||||
+ range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
|
|
||||||
+ FileRangeHandler(), FTPRangeHandler())
|
|
||||||
have_range = 1
|
|
||||||
|
|
||||||
|
|
||||||
@@ -454,7 +455,7 @@
|
|
||||||
|
|
||||||
global DEBUG
|
|
||||||
DEBUG = DBOBJ
|
|
||||||
- if keepalive_handler and keepalive.DEBUG is None:
|
|
||||||
+ if keepalive_handlers and keepalive.DEBUG is None:
|
|
||||||
keepalive.DEBUG = DBOBJ
|
|
||||||
if have_range and byterange.DEBUG is None:
|
|
||||||
byterange.DEBUG = DBOBJ
|
|
||||||
@@ -582,7 +583,8 @@
|
|
||||||
|
|
||||||
def close_all():
|
|
||||||
"""close any open keepalive connections"""
|
|
||||||
- if keepalive_handler: keepalive_handler.close_all()
|
|
||||||
+ for handler in keepalive_handlers:
|
|
||||||
+ handler.close_all()
|
|
||||||
|
|
||||||
def urlgrab(url, filename=None, **kwargs):
|
|
||||||
"""grab the file at <url> and make a local copy at <filename>
|
|
||||||
@@ -1012,7 +1014,7 @@
|
|
||||||
return self.opts.opener
|
|
||||||
elif self._opener is None:
|
|
||||||
handlers = []
|
|
||||||
- need_keepalive_handler = (keepalive_handler and self.opts.keepalive)
|
|
||||||
+ need_keepalive_handler = (keepalive_handlers and self.opts.keepalive)
|
|
||||||
need_range_handler = (range_handlers and \
|
|
||||||
(self.opts.range or self.opts.reget))
|
|
||||||
# if you specify a ProxyHandler when creating the opener
|
|
||||||
@@ -1043,7 +1045,7 @@
|
|
||||||
# -------------------------------------------------------
|
|
||||||
|
|
||||||
if need_keepalive_handler:
|
|
||||||
- handlers.append( keepalive_handler )
|
|
||||||
+ handlers.extend( keepalive_handlers )
|
|
||||||
if need_range_handler:
|
|
||||||
handlers.extend( range_handlers )
|
|
||||||
handlers.append( auth_handler )
|
|
||||||
Index: urlgrabber/keepalive.py
|
|
||||||
===================================================================
|
|
||||||
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/keepalive.py,v
|
|
||||||
retrieving revision 1.14
|
|
||||||
diff -u -r1.14 keepalive.py
|
|
||||||
--- urlgrabber/keepalive.py 4 Apr 2006 21:00:32 -0000 1.14
|
|
||||||
+++ urlgrabber/keepalive.py 13 Jul 2006 19:48:26 -0000
|
|
||||||
@@ -172,7 +172,7 @@
|
|
||||||
else:
|
|
||||||
return dict(self._hostmap)
|
|
||||||
|
|
||||||
-class HTTPHandler(urllib2.HTTPHandler):
|
|
||||||
+class KeepAliveHandler:
|
|
||||||
def __init__(self):
|
|
||||||
self._cm = ConnectionManager()
|
|
||||||
|
|
||||||
@@ -207,9 +207,6 @@
|
|
||||||
self._cm.remove(connection)
|
|
||||||
|
|
||||||
#### Transaction Execution
|
|
||||||
- def http_open(self, req):
|
|
||||||
- return self.do_open(HTTPConnection, req)
|
|
||||||
-
|
|
||||||
def do_open(self, http_class, req):
|
|
||||||
host = req.get_host()
|
|
||||||
if not host:
|
|
||||||
@@ -324,6 +321,20 @@
|
|
||||||
if req.has_data():
|
|
||||||
h.send(data)
|
|
||||||
|
|
||||||
+class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
|
|
||||||
+ def __init__(self):
|
|
||||||
+ KeepAliveHandler.__init__(self)
|
|
||||||
+
|
|
||||||
+ def http_open(self, req):
|
|
||||||
+ return self.do_open(HTTPConnection, req)
|
|
||||||
+
|
|
||||||
+class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
|
|
||||||
+ def __init__(self):
|
|
||||||
+ KeepAliveHandler.__init__(self)
|
|
||||||
+
|
|
||||||
+ def https_open(self, req):
|
|
||||||
+ return self.do_open(HTTPSConnection, req)
|
|
||||||
+
|
|
||||||
class HTTPResponse(httplib.HTTPResponse):
|
|
||||||
# we need to subclass HTTPResponse in order to
|
|
||||||
# 1) add readline() and readlines() methods
|
|
||||||
@@ -425,6 +436,9 @@
|
|
||||||
class HTTPConnection(httplib.HTTPConnection):
|
|
||||||
# use the modified response class
|
|
||||||
response_class = HTTPResponse
|
|
||||||
+
|
|
||||||
+class HTTPSConnection(httplib.HTTPSConnection):
|
|
||||||
+ response_class = HTTPResponse
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
##### TEST FUNCTIONS
|
|
Loading…
Reference in new issue