update to latest urlgrabber head

15 years ago · 36ff3aaff0
parent e5b623e840
commit 36ff3aaff0
2 changed files with 188 additions and 18 deletions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 7%{?dist}
+Release: 8%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch

@ -43,6 +43,9 @@ rm -rf $RPM_BUILD_ROOT
 %{_bindir}/urlgrabber

 %changelog
+* Mon Aug 30 2010 Seth Vidal <skvidal at fedoraproject.org> - 3.9.1-8
+- update to latest head patches
+
 * Thu Jul 22 2010 David Malcolm <dmalcolm@redhat.com> - 3.9.1-7
 - Rebuilt for https://fedoraproject.org/wiki/Features/Python_2.7/MassRebuild

--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -11,6 +11,66 @@ index 0000000..1ffe416
 +*.kdev*
 +*.kateproject
 +ipython.log*
+diff --git a/scripts/urlgrabber b/scripts/urlgrabber
+index 518e512..09cd896 100644
+--- a/scripts/urlgrabber
+++ b/scripts/urlgrabber
+@@ -115,6 +115,7 @@ options:
+                     including quotes in the case of strings.
+                     e.g.  --user_agent='"foobar/2.0"'
+ 
+  --output FILE
+   -o FILE           write output to FILE, otherwise the basename of the
+                     url will be used
+   -O                print the names of saved files to STDOUT
+@@ -170,12 +171,17 @@ class client_options:
+         return ug_options, ug_defaults
+ 
+     def process_command_line(self):
+-        short_options = 'vd:hoOpD'
+        short_options = 'vd:ho:OpD'
+         long_options = ['profile', 'repeat=', 'verbose=',
+-                        'debug=', 'help', 'progress']
+                        'debug=', 'help', 'progress', 'output=']
+         ug_long = [ o + '=' for o in self.ug_options ]
+-        optlist, args = getopt.getopt(sys.argv[1:], short_options,
+-                                      long_options + ug_long)
+        try:
+            optlist, args = getopt.getopt(sys.argv[1:], short_options,
+                                          long_options + ug_long)
+        except getopt.GetoptError, e:
+            print >>sys.stderr, "Error:", e
+            self.help([], ret=1)
+
+         self.verbose = 0
+         self.debug = None
+         self.outputfile = None
+@@ -193,6 +199,7 @@ class client_options:
+             if o == '--verbose': self.verbose = v
+             if o == '-v':        self.verbose += 1
+             if o == '-o':        self.outputfile = v
+            if o == '--output':  self.outputfile = v
+             if o == '-p' or o == '--progress': self.progress = 1
+             if o == '-d' or o == '--debug': self.debug = v
+             if o == '--profile': self.profile = 1
+@@ -222,7 +229,7 @@ class client_options:
+             print "ERROR: cannot use -o when grabbing multiple files"
+             sys.exit(1)
+ 
+-    def help(self, args):
+    def help(self, args, ret=0):
+         if not args:
+             print MAINHELP
+         else:
+@@ -234,7 +241,7 @@ class client_options:
+                     self.help_ug_option(a)
+                 else:
+                     print 'ERROR: no help on command "%s"' % a
+-        sys.exit(0)
+        sys.exit(ret)
+ 
+     def help_doc(self):
+         print __doc__
 diff --git a/test/base_test_code.py b/test/base_test_code.py
 index 50c6348..5fb43f9 100644
 --- a/test/base_test_code.py
@ -24,7 +84,7 @@ index 50c6348..5fb43f9 100644
 
 # set to a proftp server only. we're working around a couple of
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..4797436 100644
+index e090e90..0c78857 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
@ -49,7 +109,19 @@ index e090e90..4797436 100644
 
   bandwidth = 0
 
-@@ -439,6 +439,12 @@ try:
+@@ -248,6 +248,11 @@ GENERAL ARGUMENTS (kwargs)
+ 
+     Maximum size (in bytes) of the headers.
+     
+  self.ip_resolve = 'whatever'
+
+    What type of name to IP resolving to use, default is to do both IPV4 and
+    IPV6.
+
+ 
+ RETRY RELATED ARGUMENTS
+ 
+@@ -439,6 +444,12 @@ try:
 except:
     __version__ = '???'
 
@ -62,7 +134,15 @@ index e090e90..4797436 100644
 ########################################################################
 # functions for debugging output.  These functions are here because they
 # are also part of the module initialization.
-@@ -808,7 +814,7 @@ class URLGrabberOptions:
+@@ -800,6 +811,7 @@ class URLGrabberOptions:
+         self.close_connection = 0
+         self.range = None
+         self.user_agent = 'urlgrabber/%s' % __version__
+        self.ip_resolve = None
+         self.keepalive = 1
+         self.proxies = None
+         self.reget = None
+@@ -808,7 +820,7 @@ class URLGrabberOptions:
         self.prefix = None
         self.opener = None
         self.cache_openers = True
@ -71,7 +151,17 @@ index e090e90..4797436 100644
         self.text = None
         self.http_headers = None
         self.ftp_headers = None
-@@ -1052,9 +1058,15 @@ class PyCurlFileObject():
+@@ -931,6 +943,9 @@ class URLGrabber:
+         (scheme, host, path, parm, query, frag) = parts
+         if filename is None:
+             filename = os.path.basename( urllib.unquote(path) )
+            if not filename:
+                # This is better than nothing.
+                filename = 'index.html'
+         if scheme == 'file' and not opts.copy_local:
+             # just return the name of the local file - don't make a 
+             # copy currently
+@@ -1052,9 +1067,15 @@ class PyCurlFileObject():
         self._reget_length = 0
         self._prog_running = False
         self._error = (None, None)
@ -88,7 +178,7 @@ index e090e90..4797436 100644
         
     def __getattr__(self, name):
         """This effectively allows us to wrap at the instance level.
-@@ -1085,9 +1097,14 @@ class PyCurlFileObject():
+@@ -1085,9 +1106,14 @@ class PyCurlFileObject():
             return -1
             
     def _hdr_retrieve(self, buf):
@ -104,7 +194,7 @@ index e090e90..4797436 100644
         try:
             self._hdr_dump += buf
             # we have to get the size before we do the progress obj start
-@@ -1104,7 +1121,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1130,17 @@ class PyCurlFileObject():
                     s = parse150(buf)
                 if s:
                     self.size = int(s)
@ -123,7 +213,7 @@ index e090e90..4797436 100644
             return len(buf)
         except KeyboardInterrupt:
             return pycurl.READFUNC_ABORT
-@@ -1113,8 +1140,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1149,10 @@ class PyCurlFileObject():
         if self._parsed_hdr:
             return self._parsed_hdr
         statusend = self._hdr_dump.find('\n')
@ -134,7 +224,7 @@ index e090e90..4797436 100644
         self._parsed_hdr =  mimetools.Message(hdrfp)
         return self._parsed_hdr
     
-@@ -1136,6 +1165,7 @@ class PyCurlFileObject():
+@@ -1136,11 +1174,21 @@ class PyCurlFileObject():
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
         self.curl_obj.setopt(pycurl.FAILONERROR, True)
         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -142,7 +232,21 @@ index e090e90..4797436 100644
         
         if DEBUG:
             self.curl_obj.setopt(pycurl.VERBOSE, True)
-@@ -1148,9 +1178,11 @@ class PyCurlFileObject():
+         if opts.user_agent:
+             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
+        if opts.ip_resolve:
+            # Default is: IPRESOLVE_WHATEVER
+            ipr = opts.ip_resolve.lower()
+            if ipr == 'whatever': # Do we need this?
+                self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
+            if ipr == 'ipv4':
+                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+            if ipr == 'ipv6':
+                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
+         
+         # maybe to be options later
+         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+@@ -1148,9 +1196,11 @@ class PyCurlFileObject():
         
         # timeouts
         timeout = 300
@ -157,7 +261,7 @@ index e090e90..4797436 100644
 
         # ssl options
         if self.scheme == 'https':
-@@ -1276,7 +1308,7 @@ class PyCurlFileObject():
+@@ -1276,7 +1326,7 @@ class PyCurlFileObject():
                 raise err
 
             elif errcode == 60:
@ -166,7 +270,7 @@ index e090e90..4797436 100644
                 err = URLGrabError(14, msg)
                 err.url = self.url
                 raise err
-@@ -1291,7 +1323,12 @@ class PyCurlFileObject():
+@@ -1291,14 +1341,70 @@ class PyCurlFileObject():
                 raise err
                     
             elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
@ -178,9 +282,55 @@ index e090e90..4797436 100644
 +                else:
 +                    msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)
             else:
-                 msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+                pyerr2str = { 5 : _("Couldn't resolve proxy"),
+                              6 : _("Couldn't resolve host"),
+                              7 : _("Couldn't connect"),
+                              8 : _("Bad reply to FTP server"),
+                              9 : _("Access denied"),
+                             11 : _("Bad reply to FTP pass"),
+                             13 : _("Bad reply to FTP pasv"),
+                             14 : _("Bad reply to FTP 227"),
+                             15 : _("Couldn't get FTP host"),
+                             17 : _("Couldn't set FTP type"),
+                             18 : _("Partial file"),
+                             19 : _("FTP RETR command failed"),
+                             22 : _("HTTP returned error"),
+                             23 : _("Write error"),
+                             25 : _("Upload failed"),
+                             26 : _("Read error"),
+                             27 : _("Out of Memory"),
+                             28 : _("Operation timed out"),
+                             30 : _("FTP PORT command failed"),
+                             31 : _("FTP REST command failed"),
+                             33 : _("Range failed"),
+                             34 : _("HTTP POST failed"),
+                             35 : _("SSL CONNECT failed"),
+                             36 : _("Couldn't resume download"),
+                             37 : _("Couldn't read file"),
+                             42 : _("Aborted by callback"),
+                             47 : _("Too many redirects"),
+                             51 : _("Peer certificate failed verification"),
+                             53 : _("SSL engine not found"),
+                             54 : _("SSL engine set failed"),
+                             55 : _("Network error send()"),
+                             56 : _("Network error recv()"),
+                             58 : _("Local certificate failed"),
+                             59 : _("SSL set cipher failed"),
+                             60 : _("Local CA certificate failed"),
+                             61 : _("HTTP bad transfer encoding"),
+                             63 : _("Maximum file size exceeded"),
+                             64 : _("FTP SSL failed"),
+                             67 : _("Authentication failure"),
+                             70 : _("Out of disk space on server"),
+                             73 : _("Remove file exists"),
+                              }
+                errstr = str(e.args[1])
+                if not errstr:
+                    errstr = pyerr2str.get(errcode, '<Unknown>')
+                msg = 'curl#%s - "%s"' % (errcode, errstr)
                 code = errcode
-@@ -1299,6 +1336,12 @@ class PyCurlFileObject():
+             err = URLGrabError(14, msg)
             err.code = code
             err.exception = e
             raise err
@ -193,7 +343,24 @@ index e090e90..4797436 100644
 
     def _do_open(self):
         self.curl_obj = _curl_cache
-@@ -1446,9 +1489,23 @@ class PyCurlFileObject():
+@@ -1434,9 +1540,13 @@ class PyCurlFileObject():
+             #fh, self._temp_name = mkstemp()
+             #self.fo = open(self._temp_name, 'wb')
+ 
+-            
+-        self._do_perform()
+-        
+        try:            
+            self._do_perform()
+        except URLGrabError, e:
+            self.fo.flush()
+            self.fo.close()
+            raise e
+    
+ 
+ 
+         if _was_filename:
+@@ -1446,9 +1556,23 @@ class PyCurlFileObject():
             # set the time
             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
             if mod_time != -1:
@ -203,7 +370,7 @@ index e090e90..4797436 100644
 +                except OSError, e:
 +                    err = URLGrabError(16, _(\
 +                      'error setting timestamp on file %s from %s, OSError: %s') 
-+                              % (self.filenameself.url, e))
+                              % (self.filename, self.url, e))
 +                    err.url = self.url
 +                    raise err
             # re open it
@ -219,7 +386,7 @@ index e090e90..4797436 100644
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
-@@ -1532,11 +1589,14 @@ class PyCurlFileObject():
+@@ -1532,11 +1656,14 @@ class PyCurlFileObject():
     def _over_max_size(self, cur, max_size=None):
 
         if not max_size:
@ -238,7 +405,7 @@ index e090e90..4797436 100644
 
             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
-@@ -1582,9 +1642,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1709,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()