Update to latest HEAD

12 years ago · b619025a75
parent bc8c777de1
commit b619025a75
2 changed files with 64 additions and 52 deletions
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 27%{?dist}
+Release: 28%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch

@ -44,6 +44,11 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down

 %changelog
+* Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-28
+- Update to latest HEAD.
+- Add the "minrate" option. BZ 964298
+- Workaround progress "!!!" end for file:// repos.
+
 * Fri May 17 2013 Zdenek Pavlas <zpavlas@redhat.com> - 3.9.1-27
 - Update to latest HEAD.
 - add URLGrabError.code to the external downloader API
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@ -314,7 +314,7 @@ index 3e5f3b7..8eeaeda 100644
     return (fb,lb)
 
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..37d1297 100644
+index e090e90..05ea9c3 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@ -345,12 +345,19 @@ index e090e90..37d1297 100644
   text = None
   
     specifies alternative text to be passed to the progress meter
-@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
+@@ -68,14 +83,20 @@ GENERAL ARGUMENTS (kwargs)
     (which can be set on default_grabber.throttle) is used. See
     BANDWIDTH THROTTLING for more information.
 
 -  timeout = None
 +  timeout = 300
+
+    a positive integer expressing the number of seconds to wait before
+    timing out attempts to connect to a server. If the value is None
+    or 0, connection attempts will not time out. The timeout is passed
+    to the underlying pycurl object as its CONNECTTIMEOUT option, see
+    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
+    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
 
 -    a positive float expressing the number of seconds to wait for socket
 -    operations. If the value is None or 0.0, socket operations will block
@ -358,16 +365,15 @@ index e090e90..37d1297 100644
 -    method on the Socket object used for the request. See the Python
 -    documentation on settimeout for more information.
 -    http://www.python.org/doc/current/lib/socket-objects.html
-+    a positive integer expressing the number of seconds to wait before
-+    timing out attempts to connect to a server. If the value is None
-+    or 0, connection attempts will not time out. The timeout is passed
-+    to the underlying pycurl object as its CONNECTTIMEOUT option, see
-+    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
-+    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
+  minrate = 1000
+
+    This sets the low speed threshold in bytes per second. If the server
+    is sending data slower than this for at least `timeout' seconds, the
+    library aborts the connection.
 
   bandwidth = 0
 
-@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -143,8 +164,12 @@ GENERAL ARGUMENTS (kwargs)
     note that proxy authentication information may be provided using
     normal URL constructs:
       proxies={ 'http' : 'http://user:host@foo:3128' }
@ -382,7 +388,7 @@ index e090e90..37d1297 100644
 
   prefix = None
 
-@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -198,6 +223,12 @@ GENERAL ARGUMENTS (kwargs)
     control, you should probably subclass URLParser and pass it in via
     the 'urlparser' option.
 
@ -395,7 +401,7 @@ index e090e90..37d1297 100644
   ssl_ca_cert = None
 
     this option can be used if M2Crypto is available and will be
-@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
+@@ -211,43 +242,75 @@ GENERAL ARGUMENTS (kwargs)
     No-op when using the curl backend (default)
    
 
@ -480,7 +486,7 @@ index e090e90..37d1297 100644
 
 RETRY RELATED ARGUMENTS
 
-@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
+@@ -328,6 +391,15 @@ RETRY RELATED ARGUMENTS
     but it cannot (without severe trickiness) prevent the exception
     from being raised.
 
@ -496,7 +502,7 @@ index e090e90..37d1297 100644
   interrupt_callback = None
 
     This callback is called if KeyboardInterrupt is received at any
-@@ -420,6 +486,7 @@ import time
+@@ -420,6 +492,7 @@ import time
 import string
 import urllib
 import urllib2
@ -504,7 +510,7 @@ index e090e90..37d1297 100644
 import mimetools
 import thread
 import types
-@@ -428,9 +495,17 @@ import pycurl
+@@ -428,9 +501,17 @@ import pycurl
 from ftplib import parse150
 from StringIO import StringIO
 from httplib import HTTPException
@ -523,7 +529,7 @@ index e090e90..37d1297 100644
 ########################################################################
 #                     MODULE INITIALIZATION
 ########################################################################
-@@ -439,6 +514,12 @@ try:
+@@ -439,6 +520,12 @@ try:
 except:
     __version__ = '???'
 
@ -536,7 +542,7 @@ index e090e90..37d1297 100644
 ########################################################################
 # functions for debugging output.  These functions are here because they
 # are also part of the module initialization.
-@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None):
+@@ -504,6 +591,7 @@ def _init_default_logger(logspec=None):
         else:  handler = logging.FileHandler(filename)
         handler.setFormatter(formatter)
         DBOBJ = logging.getLogger('urlgrabber')
@ -544,7 +550,7 @@ index e090e90..37d1297 100644
         DBOBJ.addHandler(handler)
         DBOBJ.setLevel(level)
     except (KeyError, ImportError, ValueError):
-@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None):
+@@ -512,8 +600,8 @@ def _init_default_logger(logspec=None):
 
 def _log_package_state():
     if not DEBUG: return
@ -555,7 +561,7 @@ index e090e90..37d1297 100644
         
 _init_default_logger()
 _log_package_state()
-@@ -527,6 +609,29 @@ def _(st):
+@@ -527,6 +615,29 @@ def _(st):
 #                 END MODULE INITIALIZATION
 ########################################################################
 
@ -585,7 +591,7 @@ index e090e90..37d1297 100644
 
 
 class URLGrabError(IOError):
-@@ -662,6 +767,7 @@ class URLParser:
+@@ -662,6 +773,7 @@ class URLParser:
           opts.quote = 0     --> do not quote it
           opts.quote = None  --> guess
         """
@ -593,7 +599,7 @@ index e090e90..37d1297 100644
         quote = opts.quote
         
         if opts.prefix:
-@@ -768,6 +874,41 @@ class URLGrabberOptions:
+@@ -768,6 +880,41 @@ class URLGrabberOptions:
         else: # throttle is a float
             return self.bandwidth * self.throttle
         
@ -635,7 +641,7 @@ index e090e90..37d1297 100644
     def derive(self, **kwargs):
         """Create a derived URLGrabberOptions instance.
         This method creates a new instance and overrides the
-@@ -791,30 +932,37 @@ class URLGrabberOptions:
+@@ -791,30 +938,38 @@ class URLGrabberOptions:
         provided here.
         """
         self.progress_obj = None
@ -663,6 +669,7 @@ index e090e90..37d1297 100644
         self.cache_openers = True
 -        self.timeout = None
 +        self.timeout = 300
+        self.minrate = None
         self.text = None
         self.http_headers = None
         self.ftp_headers = None
@ -674,7 +681,7 @@ index e090e90..37d1297 100644
         self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
         self.ssl_context = None # no-op in pycurl
         self.ssl_verify_peer = True # check peer's cert for authenticityb
-@@ -827,6 +975,12 @@ class URLGrabberOptions:
+@@ -827,6 +982,12 @@ class URLGrabberOptions:
         self.size = None # if we know how big the thing we're getting is going
                          # to be. this is ultimately a MAXIMUM size for the file
         self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@ -687,7 +694,7 @@ index e090e90..37d1297 100644
         
     def __repr__(self):
         return self.format()
-@@ -846,7 +1000,18 @@ class URLGrabberOptions:
+@@ -846,7 +1007,18 @@ class URLGrabberOptions:
         s = s + indent + '}'
         return s
 
@ -707,7 +714,7 @@ index e090e90..37d1297 100644
     """Provides easy opening of URLs with a variety of options.
     
     All options are specified as kwargs. Options may be specified when
-@@ -872,7 +1037,6 @@ class URLGrabber:
+@@ -872,7 +1044,6 @@ class URLGrabber:
             # beware of infinite loops :)
             tries = tries + 1
             exception = None
@ -715,7 +722,7 @@ index e090e90..37d1297 100644
             callback  = None
             if DEBUG: DEBUG.info('attempt %i/%s: %s',
                                  tries, opts.retry, args[0])
-@@ -883,54 +1047,62 @@ class URLGrabber:
+@@ -883,54 +1054,62 @@ class URLGrabber:
             except URLGrabError, e:
                 exception = e
                 callback = opts.failure_callback
@ -785,7 +792,7 @@ index e090e90..37d1297 100644
         if scheme == 'file' and not opts.copy_local:
             # just return the name of the local file - don't make a 
             # copy currently
-@@ -950,41 +1122,51 @@ class URLGrabber:
+@@ -950,41 +1129,51 @@ class URLGrabber:
 
             elif not opts.range:
                 if not opts.checkfunc is None:
@ -852,7 +859,7 @@ index e090e90..37d1297 100644
         if limit is not None:
             limit = limit + 1
             
-@@ -1000,12 +1182,8 @@ class URLGrabber:
+@@ -1000,12 +1189,8 @@ class URLGrabber:
                 else: s = fo.read(limit)
 
                 if not opts.checkfunc is None:
@ -867,7 +874,7 @@ index e090e90..37d1297 100644
             finally:
                 fo.close()
             return s
-@@ -1020,6 +1198,7 @@ class URLGrabber:
+@@ -1020,6 +1205,7 @@ class URLGrabber:
         return s
         
     def _make_callback(self, callback_obj):
@ -875,7 +882,7 @@ index e090e90..37d1297 100644
         if callable(callback_obj):
             return callback_obj, (), {}
         else:
-@@ -1030,7 +1209,7 @@ class URLGrabber:
+@@ -1030,7 +1216,7 @@ class URLGrabber:
 default_grabber = URLGrabber()
 
 
@ -884,7 +891,7 @@ index e090e90..37d1297 100644
     def __init__(self, url, filename, opts):
         self.fo = None
         self._hdr_dump = ''
-@@ -1052,10 +1231,13 @@ class PyCurlFileObject():
+@@ -1052,10 +1238,13 @@ class PyCurlFileObject():
         self._reget_length = 0
         self._prog_running = False
         self._error = (None, None)
@ -900,7 +907,7 @@ index e090e90..37d1297 100644
     def __getattr__(self, name):
         """This effectively allows us to wrap at the instance level.
         Any attribute not found in _this_ object will be searched for
-@@ -1067,6 +1249,12 @@ class PyCurlFileObject():
+@@ -1067,6 +1256,12 @@ class PyCurlFileObject():
 
     def _retrieve(self, buf):
         try:
@ -913,7 +920,7 @@ index e090e90..37d1297 100644
             if not self._prog_running:
                 if self.opts.progress_obj:
                     size  = self.size + self._reget_length
-@@ -1079,23 +1267,40 @@ class PyCurlFileObject():
+@@ -1079,23 +1274,40 @@ class PyCurlFileObject():
                     self.opts.progress_obj.update(self._amount_read)
 
             self._amount_read += len(buf)
@ -960,7 +967,7 @@ index e090e90..37d1297 100644
             elif self.scheme in ['ftp']:
                 s = None
                 if buf.startswith('213 '):
-@@ -1104,7 +1309,18 @@ class PyCurlFileObject():
+@@ -1104,7 +1316,18 @@ class PyCurlFileObject():
                     s = parse150(buf)
                 if s:
                     self.size = int(s)
@ -980,7 +987,7 @@ index e090e90..37d1297 100644
             return len(buf)
         except KeyboardInterrupt:
             return pycurl.READFUNC_ABORT
-@@ -1113,8 +1329,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1336,10 @@ class PyCurlFileObject():
         if self._parsed_hdr:
             return self._parsed_hdr
         statusend = self._hdr_dump.find('\n')
@ -991,7 +998,7 @@ index e090e90..37d1297 100644
         self._parsed_hdr =  mimetools.Message(hdrfp)
         return self._parsed_hdr
     
-@@ -1127,6 +1345,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1352,9 @@ class PyCurlFileObject():
         if not opts:
             opts = self.opts
 
@ -1001,7 +1008,7 @@ index e090e90..37d1297 100644
 
         # defaults we're always going to set
         self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1357,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1364,21 @@ class PyCurlFileObject():
         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
         self.curl_obj.setopt(pycurl.FAILONERROR, True)
         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@ -1024,7 +1031,7 @@ index e090e90..37d1297 100644
         
         # maybe to be options later
         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1379,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1386,11 @@ class PyCurlFileObject():
         
         # timeouts
         timeout = 300
@ -1034,12 +1041,12 @@ index e090e90..37d1297 100644
 +        if hasattr(opts, 'timeout'):
 +            timeout = int(opts.timeout or 0)
 +        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
-+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000)
+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
 +        self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
 
         # ssl options
         if self.scheme == 'https':
-@@ -1158,13 +1391,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1398,16 @@ class PyCurlFileObject():
                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                 self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
             self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@ -1057,7 +1064,7 @@ index e090e90..37d1297 100644
             if opts.ssl_cert_type:                
                 self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
             if opts.ssl_key_pass:
-@@ -1187,28 +1423,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1430,26 @@ class PyCurlFileObject():
         if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
             self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
             
@ -1102,7 +1109,7 @@ index e090e90..37d1297 100644
             
         # our url
         self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1462,26 @@ class PyCurlFileObject():
+@@ -1228,39 +1469,26 @@ class PyCurlFileObject():
             
             code = self.http_code
             errcode = e.args[0]
@ -1148,7 +1155,7 @@ index e090e90..37d1297 100644
                 # this is probably wrong but ultimately this is what happens
                 # we have a legit http code and a pycurl 'writer failed' code
                 # which almost always means something aborted it from outside
-@@ -1269,36 +1490,70 @@ class PyCurlFileObject():
+@@ -1269,36 +1497,70 @@ class PyCurlFileObject():
                 # figure out what aborted the pycurl process FIXME
                 raise KeyboardInterrupt
                 
@ -1244,7 +1251,7 @@ index e090e90..37d1297 100644
 
     def _do_open(self):
         self.curl_obj = _curl_cache
-@@ -1333,7 +1588,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1595,11 @@ class PyCurlFileObject():
                 
         if self.opts.range:
             rt = self.opts.range
@ -1257,7 +1264,7 @@ index e090e90..37d1297 100644
 
         if rt:
             header = range_tuple_to_header(rt)
-@@ -1434,21 +1693,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1700,46 @@ class PyCurlFileObject():
             #fh, self._temp_name = mkstemp()
             #self.fo = open(self._temp_name, 'wb')
 
@ -1311,7 +1318,7 @@ index e090e90..37d1297 100644
         else:
             #self.fo = open(self._temp_name, 'r')
             self.fo.seek(0)
-@@ -1526,17 +1810,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1817,20 @@ class PyCurlFileObject():
             if self._prog_running:
                 downloaded += self._reget_length
                 self.opts.progress_obj.update(downloaded)
@ -1337,7 +1344,7 @@ index e090e90..37d1297 100644
 
             msg = _("Downloaded more than max size for %s: %s > %s") \
                         % (self.url, cur, max_size)
-@@ -1544,13 +1831,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1838,6 @@ class PyCurlFileObject():
             return True
         return False
         
@ -1351,7 +1358,7 @@ index e090e90..37d1297 100644
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
-@@ -1582,9 +1862,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1869,21 @@ class PyCurlFileObject():
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()
         
@ -1374,7 +1381,7 @@ index e090e90..37d1297 100644
 
 #####################################################################
 # DEPRECATED FUNCTIONS
-@@ -1621,6 +1913,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1920,482 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
 
         
 #####################################################################
@ -1480,7 +1487,7 @@ index e090e90..37d1297 100644
 +    # list of options we pass to downloader
 +    _options = (
 +        'url', 'filename',
-+        'timeout', 'close_connection', 'keepalive',
+        'timeout', 'minrate', 'close_connection', 'keepalive',
 +        'throttle', 'bandwidth', 'range', 'reget',
 +        'user_agent', 'http_headers', 'ftp_headers',
 +        'proxy', 'prefix', 'username', 'password',
@ -2022,7 +2029,7 @@ index dad410b..7975f1b 100644
     def urlopen(self, url, **kwargs):
         kw = dict(kwargs)
 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
-index dd07c6a..077fd99 100644
+index dd07c6a..b456a0c 100644
 --- a/urlgrabber/progress.py
 +++ b/urlgrabber/progress.py
@@ -133,8 +133,8 @@ class BaseMeter:
@ -2048,7 +2055,7 @@ index dd07c6a..077fd99 100644
 +    return tl.add(' [%-*.*s]' % (blen, blen, bar))
 +
 +def _term_add_end(tl, osize, size):
-+    if osize is not None:
+    if osize: # osize should be None or >0, but that's been broken.
 +        if size > osize: # Is ??? better? Really need something to say < vs >.
 +            return tl.add(' !!! '), True
 +        elif size != osize: