You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
5.6 KiB
158 lines
5.6 KiB
Index: urlgrabber/byterange.py
|
|
===================================================================
|
|
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/byterange.py,v
|
|
retrieving revision 1.11
|
|
diff -u -r1.11 byterange.py
|
|
--- urlgrabber/byterange.py 22 Oct 2005 21:57:28 -0000 1.11
|
|
+++ urlgrabber/byterange.py 13 Jul 2006 19:48:26 -0000
|
|
@@ -71,6 +71,15 @@
|
|
# HTTP's Range Not Satisfiable error
|
|
raise RangeError('Requested Range Not Satisfiable')
|
|
|
|
+class HTTPSRangeHandler(HTTPRangeHandler):
|
|
+ """ Range Header support for HTTPS. """
|
|
+
|
|
+ def https_error_206(self, req, fp, code, msg, hdrs):
|
|
+ return self.http_error_206(req, fp, code, msg, hdrs)
|
|
+
|
|
+ def https_error_416(self, req, fp, code, msg, hdrs):
|
|
+ self.https_error_416(req, fp, code, msg, hdrs)
|
|
+
|
|
class RangeableFileObject:
|
|
"""File object wrapper to enable raw range handling.
|
|
This was implemented primarilary for handling range
|
|
Index: urlgrabber/grabber.py
|
|
===================================================================
|
|
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/grabber.py,v
|
|
retrieving revision 1.46
|
|
diff -u -r1.46 grabber.py
|
|
--- urlgrabber/grabber.py 22 Mar 2006 20:09:33 -0000 1.46
|
|
+++ urlgrabber/grabber.py 13 Jul 2006 19:48:26 -0000
|
|
@@ -402,24 +402,25 @@
|
|
# This is a convenient way to make keepalive optional.
|
|
# Just rename the module so it can't be imported.
|
|
import keepalive
|
|
- from keepalive import HTTPHandler
|
|
+ from keepalive import HTTPHandler, HTTPSHandler
|
|
except ImportError, msg:
|
|
- keepalive_handler = None
|
|
+ keepalive_handlers = ()
|
|
else:
|
|
- keepalive_handler = HTTPHandler()
|
|
+ keepalive_handlers = (HTTPHandler(), HTTPSHandler())
|
|
|
|
try:
|
|
# add in range support conditionally too
|
|
import byterange
|
|
- from byterange import HTTPRangeHandler, FileRangeHandler, \
|
|
- FTPRangeHandler, range_tuple_normalize, range_tuple_to_header, \
|
|
- RangeError
|
|
+ from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
|
|
+ FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
|
|
+ range_tuple_to_header, RangeError
|
|
except ImportError, msg:
|
|
range_handlers = ()
|
|
RangeError = None
|
|
have_range = 0
|
|
else:
|
|
- range_handlers = (HTTPRangeHandler(), FileRangeHandler(), FTPRangeHandler())
|
|
+ range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
|
|
+ FileRangeHandler(), FTPRangeHandler())
|
|
have_range = 1
|
|
|
|
|
|
@@ -454,7 +455,7 @@
|
|
|
|
global DEBUG
|
|
DEBUG = DBOBJ
|
|
- if keepalive_handler and keepalive.DEBUG is None:
|
|
+ if keepalive_handlers and keepalive.DEBUG is None:
|
|
keepalive.DEBUG = DBOBJ
|
|
if have_range and byterange.DEBUG is None:
|
|
byterange.DEBUG = DBOBJ
|
|
@@ -582,7 +583,8 @@
|
|
|
|
def close_all():
|
|
"""close any open keepalive connections"""
|
|
- if keepalive_handler: keepalive_handler.close_all()
|
|
+ for handler in keepalive_handlers:
|
|
+ handler.close_all()
|
|
|
|
def urlgrab(url, filename=None, **kwargs):
|
|
"""grab the file at <url> and make a local copy at <filename>
|
|
@@ -1012,7 +1014,7 @@
|
|
return self.opts.opener
|
|
elif self._opener is None:
|
|
handlers = []
|
|
- need_keepalive_handler = (keepalive_handler and self.opts.keepalive)
|
|
+ need_keepalive_handler = (keepalive_handlers and self.opts.keepalive)
|
|
need_range_handler = (range_handlers and \
|
|
(self.opts.range or self.opts.reget))
|
|
# if you specify a ProxyHandler when creating the opener
|
|
@@ -1043,7 +1045,7 @@
|
|
# -------------------------------------------------------
|
|
|
|
if need_keepalive_handler:
|
|
- handlers.append( keepalive_handler )
|
|
+ handlers.extend( keepalive_handlers )
|
|
if need_range_handler:
|
|
handlers.extend( range_handlers )
|
|
handlers.append( auth_handler )
|
|
Index: urlgrabber/keepalive.py
|
|
===================================================================
|
|
RCS file: /cvsroot/urlgrabber/cvs-root/urlgrabber/urlgrabber/keepalive.py,v
|
|
retrieving revision 1.14
|
|
diff -u -r1.14 keepalive.py
|
|
--- urlgrabber/keepalive.py 4 Apr 2006 21:00:32 -0000 1.14
|
|
+++ urlgrabber/keepalive.py 13 Jul 2006 19:48:26 -0000
|
|
@@ -172,7 +172,7 @@
|
|
else:
|
|
return dict(self._hostmap)
|
|
|
|
-class HTTPHandler(urllib2.HTTPHandler):
|
|
+class KeepAliveHandler:
|
|
def __init__(self):
|
|
self._cm = ConnectionManager()
|
|
|
|
@@ -207,9 +207,6 @@
|
|
self._cm.remove(connection)
|
|
|
|
#### Transaction Execution
|
|
- def http_open(self, req):
|
|
- return self.do_open(HTTPConnection, req)
|
|
-
|
|
def do_open(self, http_class, req):
|
|
host = req.get_host()
|
|
if not host:
|
|
@@ -324,6 +321,20 @@
|
|
if req.has_data():
|
|
h.send(data)
|
|
|
|
+class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
|
|
+ def __init__(self):
|
|
+ KeepAliveHandler.__init__(self)
|
|
+
|
|
+ def http_open(self, req):
|
|
+ return self.do_open(HTTPConnection, req)
|
|
+
|
|
+class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
|
|
+ def __init__(self):
|
|
+ KeepAliveHandler.__init__(self)
|
|
+
|
|
+ def https_open(self, req):
|
|
+ return self.do_open(HTTPSConnection, req)
|
|
+
|
|
class HTTPResponse(httplib.HTTPResponse):
|
|
# we need to subclass HTTPResponse in order to
|
|
# 1) add readline() and readlines() methods
|
|
@@ -425,6 +436,9 @@
|
|
class HTTPConnection(httplib.HTTPConnection):
|
|
# use the modified response class
|
|
response_class = HTTPResponse
|
|
+
|
|
+class HTTPSConnection(httplib.HTTPSConnection):
|
|
+ response_class = HTTPResponse
|
|
|
|
#########################################################################
|
|
##### TEST FUNCTIONS
|