diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index cf51dff..3758799 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -402,6 +402,7 @@ import urllib import urllib2 import mimetools import thread +import types from stat import * # S_* and ST_* import pycurl from ftplib import parse150 @@ -1219,7 +1220,7 @@ class URLGrabberFileObject: self.append = 0 reget_length = 0 rt = None - if have_range and self.opts.reget and type(self.filename) == type(''): + if have_range and self.opts.reget and type(self.filename) in types.StringTypes: # we have reget turned on and we're dumping to a file try: s = os.stat(self.filename) @@ -1450,6 +1451,7 @@ class PyCurlFileObject(): self.scheme = urlparse.urlsplit(self.url)[0] self.filename = filename self.append = False + self.reget_time = None self.opts = opts self._complete = False self.reget_time = None @@ -1528,11 +1530,12 @@ class PyCurlFileObject(): # defaults we're always going to set - self.curl_obj.setopt(pycurl.NOPROGRESS, 0) + self.curl_obj.setopt(pycurl.NOPROGRESS, False) self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve) self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve) self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) - self.curl_obj.setopt(pycurl.FAILONERROR, 1) + self.curl_obj.setopt(pycurl.FAILONERROR, True) + self.curl_obj.setopt(pycurl.OPT_FILETIME, True) if DEBUG: self.curl_obj.setopt(pycurl.VERBOSE, True) @@ -1540,15 +1543,15 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent) # maybe to be options later - self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1) + self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) self.curl_obj.setopt(pycurl.MAXREDIRS, 5) - self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30) # timeouts timeout = 300 if opts.timeout: timeout = int(opts.timeout) - self.curl_obj.setopt(pycurl.TIMEOUT, timeout) + self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) + # ssl options if self.scheme == 'https': if opts.ssl_ca_cert: # this may do ZERO with nss according to curl docs @@ -1607,18 +1610,21 @@ class PyCurlFileObject(): # to other URLGrabErrors from # http://curl.haxx.se/libcurl/c/libcurl-errors.html # this covers e.args[0] == 22 pretty well - which will be common + if e.args[0] == 28: + err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) + err.url = self.url + raise err + code = self.http_code if str(e.args[1]) == '': # fake it until you make it msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) else: msg = str(e.args[1]) err = URLGrabError(14, msg) - err.code = self.http_code + err.code = code err.exception = e raise err def _do_open(self): - self.append = False - self.reget_time = None self.curl_obj = _curl_cache self.curl_obj.reset() # reset all old settings away, just in case # setup any ranges @@ -1630,11 +1636,9 @@ class PyCurlFileObject(): pass def _build_range(self): - self.reget_time = None - self.append = False reget_length = 0 rt = None - if self.opts.reget and type(self.filename) == type(''): + if self.opts.reget and type(self.filename) in types.StringTypes: # we have reget turned on and we're dumping to a file try: s = os.stat(self.filename) @@ -1729,7 +1733,7 @@ class PyCurlFileObject(): if self.filename: self._prog_reportname = str(self.filename) self._prog_basename = os.path.basename(self.filename) - + if self.append: mode = 'ab' else: mode = 'wb'