int/float multiplication CRAP!

16 years ago · 69dd5be1bf
parent 21a8790d62
commit 69dd5be1bf
2 changed files with 31 additions and 376 deletions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,8 +3,9 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 1%{?dist}
+Release: 2%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
+Patch1: urlgrabber-HEAD.patch

 License: LGPLv2+
 Group: Development/Libraries
@ -22,7 +23,7 @@ authentication, proxies and more.

 %prep
 %setup -q -n urlgrabber-%{version}
-
+%patch1 -p1

 %build
 python setup.py build
@ -42,6 +43,9 @@ rm -rf $RPM_BUILD_ROOT
 %{_bindir}/urlgrabber

 %changelog
+* Fri Sep 25 2009 Seth Vidal <skvidal at fedoraproject.org> - 3.9.1-2
+- stupid patch
+
 * Fri Sep 25 2009 Seth Vidal <skvidal at fedoraproject.org> - 3.9.1-1
 - 3.9.1

--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -1,385 +1,36 @@
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index cf51dff..979b4c1 100644
+index e090e90..c4916d5 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
-@@ -402,11 +402,11 @@ import urllib
- import urllib2
- import mimetools
- import thread
-+import types
- from stat import *  # S_* and ST_*
- import pycurl
- from ftplib import parse150
- from StringIO import StringIO
-from tempfile import mkstemp
- 
- ########################################################################
- #                     MODULE INITIALIZATION
-@@ -467,6 +467,13 @@ except AttributeError:
-     TimeoutError = None
-     have_socket_timeout = False
- 
-+try:
-+    import signal
-+    from signal import SIGPIPE, SIG_IGN
-+    signal.signal(signal.SIGPIPE, signal.SIG_IGN)
-+except ImportError:
-+    pass
-+
- ########################################################################
- # functions for debugging output.  These functions are here because they
- # are also part of the module initialization.
-@@ -859,8 +866,15 @@ class URLGrabberOptions:
-         self.data = None
-         self.urlparser = URLParser()
-         self.quote = None
-        self.ssl_ca_cert = None
-        self.ssl_context = None
-+        self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
-+        self.ssl_context = None # no-op in pycurl
-+        self.ssl_verify_peer = True # check peer's cert for authenticityb
-+        self.ssl_verify_host = True # make sure who they are and who the cert is for matches
-+        self.ssl_key = None # client key
-+        self.ssl_key_type = 'PEM' #(or DER)
-+        self.ssl_cert = None # client cert
-+        self.ssl_cert_type = 'PEM' # (or DER)
-+        self.ssl_key_pass = None # password to access the key
- 
-     def __repr__(self):
-         return self.format()
-@@ -1219,7 +1233,7 @@ class URLGrabberFileObject:
-         self.append = 0
-         reget_length = 0
-         rt = None
-        if have_range and self.opts.reget and type(self.filename) == type(''):
-+        if have_range and self.opts.reget and type(self.filename) in types.StringTypes:
-             # we have reget turned on and we're dumping to a file
-             try:
-                 s = os.stat(self.filename)
-@@ -1450,9 +1464,11 @@ class PyCurlFileObject():
-         self.scheme = urlparse.urlsplit(self.url)[0]
-         self.filename = filename
-         self.append = False
-+        self.reget_time = None
-         self.opts = opts
-+        if self.opts.reget == 'check_timestamp':
-+            raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this."
-         self._complete = False
-        self.reget_time = None
-         self._rbuf = ''
-         self._rbufsize = 1024*8
-         self._ttime = time.time()
-@@ -1474,39 +1490,45 @@ class PyCurlFileObject():
-         raise AttributeError, name
- 
-     def _retrieve(self, buf):
-        if not self._prog_running:
-            if self.opts.progress_obj:
-                size  = self.size + self._reget_length
-                self.opts.progress_obj.start(self._prog_reportname, 
-                                             urllib.unquote(self.url), 
-                                             self._prog_basename, 
-                                             size=size,
-                                             text=self.opts.text)
-                self._prog_running = True
-                self.opts.progress_obj.update(self._amount_read)
-
-        self._amount_read += len(buf)
-        self.fo.write(buf)
-        return len(buf)
-    
-+        try:
-+            if not self._prog_running:
-+                if self.opts.progress_obj:
-+                    size  = self.size + self._reget_length
-+                    self.opts.progress_obj.start(self._prog_reportname, 
-+                                                 urllib.unquote(self.url), 
-+                                                 self._prog_basename, 
-+                                                 size=size,
-+                                                 text=self.opts.text)
-+                    self._prog_running = True
-+                    self.opts.progress_obj.update(self._amount_read)
-+
-+            self._amount_read += len(buf)
-+            self.fo.write(buf)
-+            return len(buf)
-+        except KeyboardInterrupt:
-+            return -1
-+            
-     def _hdr_retrieve(self, buf):
-        self._hdr_dump += buf
-        # we have to get the size before we do the progress obj start
-        # but we can't do that w/o making it do 2 connects, which sucks
-        # so we cheat and stuff it in here in the hdr_retrieve
-        if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
-            length = buf.split(':')[1]
-            self.size = int(length)
-        elif self.scheme in ['ftp']:
-            s = None
-            if buf.startswith('213 '):
-                s = buf[3:].strip()
-            elif buf.startswith('150 '):
-                s = parse150(buf)
-            if s:
-                self.size = s
-        
-        return len(buf)
-+        try:
-+            self._hdr_dump += buf
-+            # we have to get the size before we do the progress obj start
-+            # but we can't do that w/o making it do 2 connects, which sucks
-+            # so we cheat and stuff it in here in the hdr_retrieve
-+            if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
-+                length = buf.split(':')[1]
-+                self.size = int(length)
-+            elif self.scheme in ['ftp']:
-+                s = None
-+                if buf.startswith('213 '):
-+                    s = buf[3:].strip()
-+                elif buf.startswith('150 '):
-+                    s = parse150(buf)
-+                if s:
-+                    self.size = s
-+            
-+            return len(buf)
-+        except KeyboardInterrupt:
-+            return pycurl.READFUNC_ABORT
- 
-     def _return_hdr_obj(self):
-         if self._parsed_hdr:
-@@ -1528,11 +1550,13 @@ class PyCurlFileObject():
- 
- 
-         # defaults we're always going to set
-        self.curl_obj.setopt(pycurl.NOPROGRESS, 0)
-+        self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-+        self.curl_obj.setopt(pycurl.NOSIGNAL, True)
-         self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve)
-         self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve)
-         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
-        self.curl_obj.setopt(pycurl.FAILONERROR, 1)
-+        self.curl_obj.setopt(pycurl.FAILONERROR, True)
-+        self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
-         
-         if DEBUG:
-             self.curl_obj.setopt(pycurl.VERBOSE, True)
-@@ -1540,19 +1564,32 @@ class PyCurlFileObject():
-             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
+@@ -1052,7 +1052,7 @@ class PyCurlFileObject():
+         self._reget_length = 0
+         self._prog_running = False
+         self._error = (None, None)
+-        self.size = None
+        self.size = 0
+         self._do_open()
         
-         # maybe to be options later
-        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1)
-+        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-         self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
-        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30)
         
-         # timeouts
-         timeout = 300
-         if opts.timeout:
-             timeout = int(opts.timeout)
-        self.curl_obj.setopt(pycurl.TIMEOUT, timeout)
-+            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
-+
-         # ssl options
-         if self.scheme == 'https':
-             if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
-                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
-+                self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
-+            self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
-+            self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
-+            if opts.ssl_key:
-+                self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
-+            if opts.ssl_key_type:
-+                self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type)
-+            if opts.ssl_cert:
-+                self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
-+            if opts.ssl_cert_type:                
-+                self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
-+            if opts.ssl_key_pass:
-+                self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
- 
-         #headers:
-         if opts.http_headers and self.scheme in ('http', 'https'):
-@@ -1578,19 +1615,21 @@ class PyCurlFileObject():
-                     if scheme not in ('ftp'):
-                         continue
-                     else:
-+                        if proxy == '_none_': proxy = ""
-                         self.curl_obj.setopt(pycurl.PROXY, proxy)
-                 elif self.scheme in ('http', 'https'):
-                     if scheme not in ('http', 'https'):
-                         continue
-                     else:
-+                        if proxy == '_none_': proxy = ""
-                         self.curl_obj.setopt(pycurl.PROXY, proxy)
-        
-        # username/password/auth settings
-+            
-+        # FIXME username/password/auth settings
- 
-         #posts - simple - expects the fields as they are
-         if opts.data:
-             self.curl_obj.setopt(pycurl.POST, True)
-            self.curl_obj.setopt(pycurl.POSTFIELDS, opts.data)
-+            self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
-             
-         # our url
-         self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1607,18 +1646,62 @@ class PyCurlFileObject():
-             # to other URLGrabErrors from 
-             # http://curl.haxx.se/libcurl/c/libcurl-errors.html
-             # this covers e.args[0] == 22 pretty well - which will be common
-            if str(e.args[1]) == '': # fake it until you make it
-+            code = self.http_code                                
-+            if e.args[0] == 23 and code >= 200 and code < 299:
-+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
-+                err.url = self.url
-+                # this is probably wrong but ultimately this is what happens
-+                # we have a legit http code and a pycurl 'writer failed' code
-+                # which almost always means something aborted it from outside
-+                # since we cannot know what it is -I'm banking on it being
-+                # a ctrl-c. XXXX - if there's a way of going back two raises to 
-+                # figure out what aborted the pycurl process FIXME
-+                raise KeyboardInterrupt
-+            
-+            elif e.args[0] == 28:
-+                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-+                err.url = self.url
-+                raise err
-+            elif e.args[0] == 35:
-+                msg = _("problem making ssl connection")
-+                err = URLGrabError(14, msg)
-+                err.url = self.url
-+                raise err
-+
-+            if e.args[0] == 42:
-+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
-+                err.url = self.url
-+                # this is probably wrong but ultimately this is what happens
-+                # we have a legit http code and a pycurl 'writer failed' code
-+                # which almost always means something aborted it from outside
-+                # since we cannot know what it is -I'm banking on it being
-+                # a ctrl-c. XXXX - if there's a way of going back two raises to 
-+                # figure out what aborted the pycurl process FIXME
-+                raise KeyboardInterrupt
-+                
-+            elif e.args[0] == 58:
-+                msg = _("problem with the local client certificate")
-+                err = URLGrabError(14, msg)
-+                err.url = self.url
-+                raise err
-+
-+            elif e.args[0] == 60:
-+                msg = _("client cert cannot be verified or client cert incorrect")
-+                err = URLGrabError(14, msg)
-+                err.url = self.url
-+                raise err
-+            
-+            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
-                 msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-             else:
-                msg = str(e.args[1])
-+                msg = 'PYCURL ERROR %s - "%s"' % (e.args[0], str(e.args[1]))
-+                code = e.args[0]
-             err = URLGrabError(14, msg)
-            err.code = self.http_code
-+            err.code = code
+@@ -1299,6 +1299,12 @@ class PyCurlFileObject():
+             err.code = code
             err.exception = e
             raise err
-            
-+
+        else:
+            if self._error[1]:
+                msg = self._error[1]
+                err = URLGRabError(14, msg)
+                err.url = self.url
+                raise err
+ 
     def _do_open(self):
-        self.append = False
-        self.reget_time = None
         self.curl_obj = _curl_cache
-         self.curl_obj.reset() # reset all old settings away, just in case
-         # setup any ranges
-@@ -1630,11 +1713,9 @@ class PyCurlFileObject():
-         pass
-         
-     def _build_range(self):
-        self.reget_time = None
-        self.append = False
-         reget_length = 0
-         rt = None
-        if self.opts.reget and type(self.filename) == type(''):
-+        if self.opts.reget and type(self.filename) in types.StringTypes:
-             # we have reget turned on and we're dumping to a file
-             try:
-                 s = os.stat(self.filename)
-@@ -1726,10 +1807,10 @@ class PyCurlFileObject():
-         if self._complete:
-             return
- 
-        if self.filename:
-+        if self.filename is not None:
-             self._prog_reportname = str(self.filename)
-             self._prog_basename = os.path.basename(self.filename)
-
-+            
-             if self.append: mode = 'ab'
-             else: mode = 'wb'
- 
-@@ -1746,19 +1827,23 @@ class PyCurlFileObject():
-         else:
-             self._prog_reportname = 'MEMORY'
-             self._prog_basename = 'MEMORY'
-            fh, self._temp_name = mkstemp()
+@@ -1536,7 +1542,8 @@ class PyCurlFileObject():
+         if self.opts.size: # if we set an opts size use that, no matter what
+             max_size = self.opts.size
+         if not max_size: return False # if we have None for all of the Max then this is dumb
+-        if cur > max_size + max_size*.10:
 +
-             
-            self.fo = open(self._temp_name, 'wb')
-+            self.fo = StringIO()
-+            # if this is to be a tempfile instead....
-+            # it just makes crap in the tempdir
-+            #fh, self._temp_name = mkstemp()
-+            #self.fo = open(self._temp_name, 'wb')
- 
-             
-         self._do_perform()
-         
+        if cur > int(float(max_size) * 1.10):
 
-        # close it up
-        self.fo.flush()
-        self.fo.close()
- 
-         if self.filename:            
-+            # close it up
-+            self.fo.flush()
-+            self.fo.close()
-             # set the time
-             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
-             if mod_time != -1:
-@@ -1766,7 +1851,8 @@ class PyCurlFileObject():
-             # re open it
-             self.fo = open(self.filename, 'r')
-         else:
-            self.fo = open(self._temp_name, 'r')
-+            #self.fo = open(self._temp_name, 'r')
-+            self.fo.seek(0)
- 
-         self._complete = True
-     
-@@ -1834,10 +1920,20 @@ class PyCurlFileObject():
-         return
- 
-     def _progress_update(self, download_total, downloaded, upload_total, uploaded):
-            if self._prog_running:
-                downloaded += self._reget_length
-                self.opts.progress_obj.update(downloaded)
-
-+            try:
-+                if self._prog_running:
-+                    downloaded += self._reget_length
-+                    self.opts.progress_obj.update(downloaded)
-+            except KeyboardInterrupt:
-+                return -1
-+
-+    def _to_utf8(self, obj, errors='replace'):
-+        '''convert 'unicode' to an encoded utf-8 byte string '''
-+        # stolen from yum.i18n
-+        if isinstance(obj, unicode):
-+            obj = obj.encode('utf-8', errors)
-+        return obj
-+        
-     def read(self, amt=None):
-         self._fill_buffer(amt)
-         if amt is None:
+             msg = _("Downloaded more than max size for %s: %s > %s") \
+                         % (self.url, cur, max_size)