python-urlgrabber/urlgrabber-HEAD.patch

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index cf51dff..3758799 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -402,6 +402,7 @@ import urllib
 import urllib2
 import mimetools
 import thread
+import types
 from stat import *  # S_* and ST_*
 import pycurl
 from ftplib import parse150
@@ -1219,7 +1220,7 @@ class URLGrabberFileObject:
         self.append = 0
         reget_length = 0
         rt = None
-        if have_range and self.opts.reget and type(self.filename) == type(''):
+        if have_range and self.opts.reget and type(self.filename) in types.StringTypes:
             # we have reget turned on and we're dumping to a file
             try:
                 s = os.stat(self.filename)
@@ -1450,6 +1451,7 @@ class PyCurlFileObject():
         self.scheme = urlparse.urlsplit(self.url)[0]
         self.filename = filename
         self.append = False
+        self.reget_time = None
         self.opts = opts
         self._complete = False
         self.reget_time = None
@@ -1528,11 +1530,12 @@ class PyCurlFileObject():
 
 
         # defaults we're always going to set
-        self.curl_obj.setopt(pycurl.NOPROGRESS, 0)
+        self.curl_obj.setopt(pycurl.NOPROGRESS, False)
         self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve)
         self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve)
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
-        self.curl_obj.setopt(pycurl.FAILONERROR, 1)
+        self.curl_obj.setopt(pycurl.FAILONERROR, True)
+        self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
         
         if DEBUG:
             self.curl_obj.setopt(pycurl.VERBOSE, True)
@@ -1540,15 +1543,15 @@ class PyCurlFileObject():
             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
         
         # maybe to be options later
-        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1)
+        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
         self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
-        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30)
         
         # timeouts
         timeout = 300
         if opts.timeout:
             timeout = int(opts.timeout)
-        self.curl_obj.setopt(pycurl.TIMEOUT, timeout)
+            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+
         # ssl options
         if self.scheme == 'https':
             if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
@@ -1607,18 +1610,21 @@ class PyCurlFileObject():
             # to other URLGrabErrors from 
             # http://curl.haxx.se/libcurl/c/libcurl-errors.html
             # this covers e.args[0] == 22 pretty well - which will be common
+            if e.args[0] == 28:
+                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+            code = self.http_code                
             if str(e.args[1]) == '': # fake it until you make it
                 msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
             else:
                 msg = str(e.args[1])
             err = URLGrabError(14, msg)
-            err.code = self.http_code
+            err.code = code
             err.exception = e
             raise err
             
     def _do_open(self):
-        self.append = False
-        self.reget_time = None
         self.curl_obj = _curl_cache
         self.curl_obj.reset() # reset all old settings away, just in case
         # setup any ranges
@@ -1630,11 +1636,9 @@ class PyCurlFileObject():
         pass
         
     def _build_range(self):
-        self.reget_time = None
-        self.append = False
         reget_length = 0
         rt = None
-        if self.opts.reget and type(self.filename) == type(''):
+        if self.opts.reget and type(self.filename) in types.StringTypes:
             # we have reget turned on and we're dumping to a file
             try:
                 s = os.stat(self.filename)
@@ -1729,7 +1733,7 @@ class PyCurlFileObject():
         if self.filename:
             self._prog_reportname = str(self.filename)
             self._prog_basename = os.path.basename(self.filename)
-
+            
             if self.append: mode = 'ab'
             else: mode = 'wb'
<sigh> actually check in the patch :( 16 years ago			`diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py`
			`index cf51dff..3758799 100644`
			`--- a/urlgrabber/grabber.py`
			`+++ b/urlgrabber/grabber.py`
			`@@ -402,6 +402,7 @@ import urllib`
			`import urllib2`
			`import mimetools`
			`import thread`
			`+import types`
			`from stat import * # S_* and ST_*`
			`import pycurl`
			`from ftplib import parse150`
			`@@ -1219,7 +1220,7 @@ class URLGrabberFileObject:`
			`self.append = 0`
			`reget_length = 0`
			`rt = None`
			`- if have_range and self.opts.reget and type(self.filename) == type(''):`
			`+ if have_range and self.opts.reget and type(self.filename) in types.StringTypes:`
			`# we have reget turned on and we're dumping to a file`
			`try:`
			`s = os.stat(self.filename)`
			`@@ -1450,6 +1451,7 @@ class PyCurlFileObject():`
			`self.scheme = urlparse.urlsplit(self.url)[0]`
			`self.filename = filename`
			`self.append = False`
			`+ self.reget_time = None`
			`self.opts = opts`
			`self._complete = False`
			`self.reget_time = None`
			`@@ -1528,11 +1530,12 @@ class PyCurlFileObject():`


			`# defaults we're always going to set`
			`- self.curl_obj.setopt(pycurl.NOPROGRESS, 0)`
			`+ self.curl_obj.setopt(pycurl.NOPROGRESS, False)`
			`self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve)`
			`self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve)`
			`self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)`
			`- self.curl_obj.setopt(pycurl.FAILONERROR, 1)`
			`+ self.curl_obj.setopt(pycurl.FAILONERROR, True)`
			`+ self.curl_obj.setopt(pycurl.OPT_FILETIME, True)`

			`if DEBUG:`
			`self.curl_obj.setopt(pycurl.VERBOSE, True)`
			`@@ -1540,15 +1543,15 @@ class PyCurlFileObject():`
			`self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)`

			`# maybe to be options later`
			`- self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1)`
			`+ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)`
			`self.curl_obj.setopt(pycurl.MAXREDIRS, 5)`
			`- self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30)`

			`# timeouts`
			`timeout = 300`
			`if opts.timeout:`
			`timeout = int(opts.timeout)`
			`- self.curl_obj.setopt(pycurl.TIMEOUT, timeout)`
			`+ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)`
			`+`
			`# ssl options`
			`if self.scheme == 'https':`
			`if opts.ssl_ca_cert: # this may do ZERO with nss according to curl docs`
			`@@ -1607,18 +1610,21 @@ class PyCurlFileObject():`
			`# to other URLGrabErrors from`
			`# http://curl.haxx.se/libcurl/c/libcurl-errors.html`
			`# this covers e.args[0] == 22 pretty well - which will be common`
			`+ if e.args[0] == 28:`
			`+ err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))`
			`+ err.url = self.url`
			`+ raise err`
			`+ code = self.http_code`
			`if str(e.args[1]) == '': # fake it until you make it`
			`msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)`
			`else:`
			`msg = str(e.args[1])`
			`err = URLGrabError(14, msg)`
			`- err.code = self.http_code`
			`+ err.code = code`
			`err.exception = e`
			`raise err`

			`def _do_open(self):`
			`- self.append = False`
			`- self.reget_time = None`
			`self.curl_obj = _curl_cache`
			`self.curl_obj.reset() # reset all old settings away, just in case`
			`# setup any ranges`
			`@@ -1630,11 +1636,9 @@ class PyCurlFileObject():`
			`pass`

			`def _build_range(self):`
			`- self.reget_time = None`
			`- self.append = False`
			`reget_length = 0`
			`rt = None`
			`- if self.opts.reget and type(self.filename) == type(''):`
			`+ if self.opts.reget and type(self.filename) in types.StringTypes:`
			`# we have reget turned on and we're dumping to a file`
			`try:`
			`s = os.stat(self.filename)`
			`@@ -1729,7 +1733,7 @@ class PyCurlFileObject():`
			`if self.filename:`
			`self._prog_reportname = str(self.filename)`
			`self._prog_basename = os.path.basename(self.filename)`
			`-`
			`+`
			`if self.append: mode = 'ab'`
			`else: mode = 'wb'`