From 09166dba1eeea986a24dbeea26e0108f24b7c3c5 Mon Sep 17 00:00:00 2001 From: Seth Vidal Date: Tue, 18 Aug 2009 16:48:10 +0000 Subject: [PATCH] latest head patch: adds ssl options ,fixes POST for smolt, enhances error output --- md5-hashlib.patch | 38 --- python-urlgrabber.spec | 5 +- ...er-3.0.0-progress-C-c+serial-console.patch | 58 ---- urlgrabber-HEAD.patch | 118 +++++-- urlgrabber-extra-progress.patch | 13 - urlgrabber-file-checkfunc.patch | 16 - urlgrabber-ftp-port.patch | 16 - urlgrabber-grab-no-range.patch | 64 ---- urlgrabber-keepalive.patch | 24 -- urlgrabber-no-ssl-ok.patch | 25 -- urlgrabber-progress-ui.patch | 305 ------------------ urlgrabber-string-type.patch | 60 ---- 12 files changed, 100 insertions(+), 642 deletions(-) delete mode 100644 md5-hashlib.patch delete mode 100644 urlgrabber-3.0.0-progress-C-c+serial-console.patch delete mode 100644 urlgrabber-extra-progress.patch delete mode 100644 urlgrabber-file-checkfunc.patch delete mode 100644 urlgrabber-ftp-port.patch delete mode 100644 urlgrabber-grab-no-range.patch delete mode 100644 urlgrabber-keepalive.patch delete mode 100644 urlgrabber-no-ssl-ok.patch delete mode 100644 urlgrabber-progress-ui.patch delete mode 100644 urlgrabber-string-type.patch diff --git a/md5-hashlib.patch b/md5-hashlib.patch deleted file mode 100644 index 09d1b20..0000000 --- a/md5-hashlib.patch +++ /dev/null @@ -1,38 +0,0 @@ ---- a/urlgrabber/keepalive.py~ 2009-03-13 10:45:27.000000000 -0400 -+++ b/urlgrabber/keepalive.py 2009-03-13 10:45:27.000000000 -0400 -@@ -475,7 +475,7 @@ - keepalive_handler.close_all() - - def continuity(url): -- import md5 -+ import hashlib - format = '%25s: %s' - - # first fetch the file with the normal http handler -@@ -484,7 +484,7 @@ - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() -- m = md5.new(foo) -+ m = hashlib.md5(foo) - print format % ('normal urllib', m.hexdigest()) - - # now install the keepalive handler and try again -@@ -494,7 +494,7 @@ - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() -- m = md5.new(foo) -+ m = hashlib.md5(foo) - print format % ('keepalive read', m.hexdigest()) - - fo = urllib2.urlopen(url) -@@ -504,7 +504,7 @@ - if f: foo = foo + f - else: break - fo.close() -- m = md5.new(foo) -+ m = hashlib.md5(foo) - print format % ('keepalive readline', m.hexdigest()) - - def comp(N, url): diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index 641fde8..5cfacd7 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -3,7 +3,7 @@ Summary: A high-level cross-protocol url-grabber Name: python-urlgrabber Version: 3.9.0 -Release: 7%{?dist} +Release: 8%{?dist} Source0: urlgrabber-%{version}.tar.gz Patch0: urlgrabber-HEAD.patch @@ -44,6 +44,9 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/urlgrabber %changelog +* Tue Aug 18 2009 Seth Vidal - 3.9.0-8 +- ssl options, http POST string type fixes + * Mon Aug 10 2009 Seth Vidal - 3.9.0-6 - reget fixes, tmpfiles no longer made for urlopen() calls. diff --git a/urlgrabber-3.0.0-progress-C-c+serial-console.patch b/urlgrabber-3.0.0-progress-C-c+serial-console.patch deleted file mode 100644 index 5ea31b3..0000000 --- a/urlgrabber-3.0.0-progress-C-c+serial-console.patch +++ /dev/null @@ -1,58 +0,0 @@ -diff -ru urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py ---- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2009-04-08 10:24:52.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/progress.py 2009-04-08 10:31:51.000000000 -0400 -@@ -34,7 +34,11 @@ - try: - buf = 'abcdefgh' - buf = fcntl.ioctl(fd, termios.TIOCGWINSZ, buf) -- return struct.unpack('hhhh', buf)[1] -+ ret = struct.unpack('hhhh', buf)[1] -+ if ret == 0: -+ return 80 -+ # Add minimum too? -+ return ret - except: # IOError - return 80 - -@@ -237,7 +241,7 @@ - ui_time = tl.add(' %9s' % fetime) - ui_end = tl.add(' ' * 5) - ui_rate = tl.add(' %5sB/s' % ave_dl) -- out = '\r%-*.*s%s%s%s%s' % (tl.rest(), tl.rest(), text, -+ out = '%-*.*s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, - ui_rate, ui_size, ui_time, ui_end) - else: - rtime = self.re.remaining_time() -@@ -261,7 +265,7 @@ - if (blen * frac) - int(blen * frac) >= 0.5: - bar += '-' - ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar)) -- out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text, -+ out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, - ui_sofar_pc, ui_pc, ui_bar, - ui_rate, ui_size, ui_time, ui_end) - -@@ -282,7 +286,12 @@ - tl = TerminalLine(8) - ui_size = tl.add(' | %5sB' % total_size) - ui_time = tl.add(' %9s' % total_time) -- ui_end = tl.add(' ' * 5) -+ not_done = self.size is not None and amount_read != self.size -+ if not_done: -+ ui_end = tl.add(' ... ') -+ else: -+ ui_end = tl.add(' ' * 5) -+ - out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text, - ui_size, ui_time, ui_end) - self.fo.write(out) -@@ -290,7 +299,7 @@ - - # Don't add size to the sofar size until we have all of it. - # If we don't have a size, then just pretend/hope we got all of it. -- if self.size is not None and amount_read != self.size: -+ if not_done: - return - - if _text_meter_total_size: -Only in urlgrabber-3.0.0/urlgrabber: progress.py~ diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch index e5d8c17..9ce621e 100644 --- a/urlgrabber-HEAD.patch +++ b/urlgrabber-HEAD.patch @@ -1,5 +1,5 @@ diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index cf51dff..cea47e3 100644 +index cf51dff..9692219 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -402,11 +402,11 @@ import urllib @@ -29,7 +29,25 @@ index cf51dff..cea47e3 100644 ######################################################################## # functions for debugging output. These functions are here because they # are also part of the module initialization. -@@ -1219,7 +1226,7 @@ class URLGrabberFileObject: +@@ -859,8 +866,15 @@ class URLGrabberOptions: + self.data = None + self.urlparser = URLParser() + self.quote = None +- self.ssl_ca_cert = None +- self.ssl_context = None ++ self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb ++ self.ssl_context = None # no-op in pycurl ++ self.ssl_verify_peer = True # check peer's cert for authenticityb ++ self.ssl_verify_host = True # make sure who they are and who the cert is for matches ++ self.ssl_key = None # client key ++ self.ssl_key_type = 'PEM' #(or DER) ++ self.ssl_cert = None # client cert ++ self.ssl_cert_type = 'PEM' # (or DER) ++ self.ssl_key_pass = None # password to access the key + + def __repr__(self): + return self.format() +@@ -1219,7 +1233,7 @@ class URLGrabberFileObject: self.append = 0 reget_length = 0 rt = None @@ -38,7 +56,7 @@ index cf51dff..cea47e3 100644 # we have reget turned on and we're dumping to a file try: s = os.stat(self.filename) -@@ -1450,9 +1457,11 @@ class PyCurlFileObject(): +@@ -1450,9 +1464,11 @@ class PyCurlFileObject(): self.scheme = urlparse.urlsplit(self.url)[0] self.filename = filename self.append = False @@ -51,7 +69,7 @@ index cf51dff..cea47e3 100644 self._rbuf = '' self._rbufsize = 1024*8 self._ttime = time.time() -@@ -1474,39 +1483,45 @@ class PyCurlFileObject(): +@@ -1474,39 +1490,45 @@ class PyCurlFileObject(): raise AttributeError, name def _retrieve(self, buf): @@ -129,7 +147,7 @@ index cf51dff..cea47e3 100644 def _return_hdr_obj(self): if self._parsed_hdr: -@@ -1528,11 +1543,13 @@ class PyCurlFileObject(): +@@ -1528,11 +1550,13 @@ class PyCurlFileObject(): # defaults we're always going to set @@ -145,7 +163,7 @@ index cf51dff..cea47e3 100644 if DEBUG: self.curl_obj.setopt(pycurl.VERBOSE, True) -@@ -1540,15 +1557,15 @@ class PyCurlFileObject(): +@@ -1540,19 +1564,32 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent) # maybe to be options later @@ -164,17 +182,39 @@ index cf51dff..cea47e3 100644 # ssl options if self.scheme == 'https': if opts.ssl_ca_cert: # this may do ZERO with nss according to curl docs -@@ -1607,18 +1624,33 @@ class PyCurlFileObject(): + self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) ++ self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) ++ self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) ++ self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host) ++ if opts.ssl_key: ++ self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key) ++ if opts.ssl_key_type: ++ self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type) ++ if opts.ssl_cert: ++ self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert) ++ if opts.ssl_cert_type: ++ self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) ++ if opts.ssl_key_pass: ++ self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass) + + #headers: + if opts.http_headers and self.scheme in ('http', 'https'): +@@ -1590,7 +1627,7 @@ class PyCurlFileObject(): + #posts - simple - expects the fields as they are + if opts.data: + self.curl_obj.setopt(pycurl.POST, True) +- self.curl_obj.setopt(pycurl.POSTFIELDS, opts.data) ++ self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data)) + + # our url + self.curl_obj.setopt(pycurl.URL, self.url) +@@ -1607,18 +1644,51 @@ class PyCurlFileObject(): # to other URLGrabErrors from # http://curl.haxx.se/libcurl/c/libcurl-errors.html # this covers e.args[0] == 22 pretty well - which will be common +- if str(e.args[1]) == '': # fake it until you make it + code = self.http_code -+ if e.args[0] == 28: -+ err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) -+ err.url = self.url -+ raise err -+ -+ elif e.args[0] == 23 and code >= 200 and code < 299: ++ if e.args[0] == 23 and code >= 200 and code < 299: + err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) + err.url = self.url + # this is probably wrong but ultimately this is what happens @@ -184,11 +224,35 @@ index cf51dff..cea47e3 100644 + # a ctrl-c. XXXX - if there's a way of going back two raises to + # figure out what aborted the pycurl process FIXME + raise KeyboardInterrupt ++ ++ elif e.args[0] == 28: ++ err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) ++ err.url = self.url ++ raise err ++ elif e.args[0] == 35: ++ msg = _("problem making ssl connection") ++ err = URLGrabError(14, msg) ++ err.url = self.url ++ raise err ++ ++ elif e.args[0] == 58: ++ msg = _("problem with the local client certificate") ++ err = URLGrabError(14, msg) ++ err.url = self.url ++ raise err + - if str(e.args[1]) == '': # fake it until you make it ++ elif e.args[0] == 60: ++ msg = _("client cert cannot be verified or client cert incorrect") ++ err = URLGrabError(14, msg) ++ err.url = self.url ++ raise err ++ ++ elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) else: - msg = str(e.args[1]) +- msg = str(e.args[1]) ++ msg = 'PYCURL ERROR %s - "%s"' % (e.args[0], str(e.args[1])) ++ code = e.args[0] err = URLGrabError(14, msg) - err.code = self.http_code + err.code = code @@ -202,7 +266,7 @@ index cf51dff..cea47e3 100644 self.curl_obj = _curl_cache self.curl_obj.reset() # reset all old settings away, just in case # setup any ranges -@@ -1630,11 +1662,9 @@ class PyCurlFileObject(): +@@ -1630,11 +1700,9 @@ class PyCurlFileObject(): pass def _build_range(self): @@ -215,8 +279,12 @@ index cf51dff..cea47e3 100644 # we have reget turned on and we're dumping to a file try: s = os.stat(self.filename) -@@ -1729,7 +1759,7 @@ class PyCurlFileObject(): - if self.filename: +@@ -1726,10 +1794,10 @@ class PyCurlFileObject(): + if self._complete: + return + +- if self.filename: ++ if self.filename is not None: self._prog_reportname = str(self.filename) self._prog_basename = os.path.basename(self.filename) - @@ -224,7 +292,7 @@ index cf51dff..cea47e3 100644 if self.append: mode = 'ab' else: mode = 'wb' -@@ -1746,19 +1776,23 @@ class PyCurlFileObject(): +@@ -1746,19 +1814,23 @@ class PyCurlFileObject(): else: self._prog_reportname = 'MEMORY' self._prog_basename = 'MEMORY' @@ -253,7 +321,7 @@ index cf51dff..cea47e3 100644 # set the time mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME) if mod_time != -1: -@@ -1766,7 +1800,8 @@ class PyCurlFileObject(): +@@ -1766,7 +1838,8 @@ class PyCurlFileObject(): # re open it self.fo = open(self.filename, 'r') else: @@ -263,11 +331,17 @@ index cf51dff..cea47e3 100644 self._complete = True -@@ -1838,6 +1873,7 @@ class PyCurlFileObject(): +@@ -1838,6 +1911,13 @@ class PyCurlFileObject(): downloaded += self._reget_length self.opts.progress_obj.update(downloaded) -+ ++ def _to_utf8(self, obj, errors='replace'): ++ '''convert 'unicode' to an encoded utf-8 byte string ''' ++ # stolen from yum.i18n ++ if isinstance(obj, unicode): ++ obj = obj.encode('utf-8', errors) ++ return obj ++ def read(self, amt=None): self._fill_buffer(amt) if amt is None: diff --git a/urlgrabber-extra-progress.patch b/urlgrabber-extra-progress.patch deleted file mode 100644 index f1c9df6..0000000 --- a/urlgrabber-extra-progress.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff -ru urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py ---- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2008-10-14 23:46:24.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/progress.py 2008-10-14 23:57:53.000000000 -0400 -@@ -258,6 +258,8 @@ - # Make text grow a bit before we start growing the bar too - blen = 4 + tl.rest_split(8 + 8 + 4) - bar = '='*int(blen * frac) -+ if (blen * frac) - int(blen * frac) >= 0.5: -+ bar += '-' - ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar)) - out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text, - ui_sofar_pc, ui_pc, ui_bar, -Only in urlgrabber-3.0.0/urlgrabber: progress.py~ diff --git a/urlgrabber-file-checkfunc.patch b/urlgrabber-file-checkfunc.patch deleted file mode 100644 index 2ffac95..0000000 --- a/urlgrabber-file-checkfunc.patch +++ /dev/null @@ -1,16 +0,0 @@ ---- foo/urlgrabber/grabber.py~ 2009-03-09 12:01:21.000000000 -0400 -+++ foo/urlgrabber/grabber.py 2009-03-09 12:01:21.000000000 -0400 -@@ -913,6 +913,13 @@ - raise URLGrabError(3, - _('Not a normal file: %s') % (path, )) - elif not opts.range: -+ if not opts.checkfunc is None: -+ cb_func, cb_args, cb_kwargs = \ -+ self._make_callback(opts.checkfunc) -+ obj = CallbackObject() -+ obj.filename = path -+ obj.url = url -+ apply(cb_func, (obj, )+cb_args, cb_kwargs) - return path - - def retryfunc(opts, url, filename): diff --git a/urlgrabber-ftp-port.patch b/urlgrabber-ftp-port.patch deleted file mode 100644 index 7382f3c..0000000 --- a/urlgrabber-ftp-port.patch +++ /dev/null @@ -1,16 +0,0 @@ -Index: urlgrabber/byterange.py -=================================================================== -RCS file: /home/groups/urlgrabber/cvs-root/urlgrabber/urlgrabber/byterange.py,v -retrieving revision 1.12 -diff -u -r1.12 byterange.py ---- urlgrabber/byterange.py 20 Jul 2006 20:15:58 -0000 1.12 -+++ urlgrabber/byterange.py 13 Mar 2008 18:48:56 -0000 -@@ -272,6 +272,8 @@ - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT -+ else: -+ port = int(port) - - # username/password handling - user, host = splituser(host) diff --git a/urlgrabber-grab-no-range.patch b/urlgrabber-grab-no-range.patch deleted file mode 100644 index 32a3d05..0000000 --- a/urlgrabber-grab-no-range.patch +++ /dev/null @@ -1,64 +0,0 @@ -diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py ---- urlgrabber-3.0.0-orig/urlgrabber/grabber.py 2008-05-02 17:18:14.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/grabber.py 2008-05-02 17:30:35.000000000 -0400 -@@ -400,6 +400,11 @@ - HTTPException = None - - try: -+ from httplib import HTTPResponse -+except ImportError, msg: -+ HTTPResponse = None -+ -+try: - # This is a convenient way to make keepalive optional. - # Just rename the module so it can't be imported. - import keepalive -@@ -1065,8 +1070,21 @@ - req = urllib2.Request(self.url, self.opts.data) # build request object - self._add_headers(req) # add misc headers that we need - self._build_range(req) # take care of reget and byterange stuff -+ -+ def _checkNoReget(fo): -+ # HTTP can disallow Range requests -+ -+ if self.opts.reget is None: -+ return False -+ if 'Range' not in req.headers: -+ return False -+ if not isinstance(fo, HTTPResponse): -+ return False -+ -+ return fo.code != 206 - - fo, hdr = self._make_request(req, opener) -+ fetch_again = 0 - if self.reget_time and self.opts.reget == 'check_timestamp': - # do this if we have a local file with known timestamp AND - # we're in check_timestamp reget mode. -@@ -1078,14 +1096,26 @@ - except (TypeError,): - fetch_again = 1 - -+ if _checkNoReget(fo): # doing a "reget" didn't work, so fixup -+ fetch_again = 0 -+ self.opts.reget = None -+ self.append = 0 -+ self._amount_read = 0 -+ -+ if True: - if fetch_again: - # the server version is newer than the (incomplete) local - # version, so we should abandon the version we're getting - # and fetch the whole thing again. - fo.close() -+ # Without this we'll have to read all of the previous request -+ # data. For "large" requests, this is very bad. -+ fo.close_connection() - self.opts.reget = None - del req.headers['Range'] - self._build_range(req) -+ # This doesn't get reset in _build_range() *sigh* -+ self._amount_read = 0 - fo, hdr = self._make_request(req, opener) - - (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url) diff --git a/urlgrabber-keepalive.patch b/urlgrabber-keepalive.patch deleted file mode 100644 index c051a01..0000000 --- a/urlgrabber-keepalive.patch +++ /dev/null @@ -1,24 +0,0 @@ ---- urlgrabber/grabber.py 22 Sep 2006 00:58:05 -0000 1.48 -+++ urlgrabber/grabber.py 5 Dec 2006 23:48:51 -0000 -@@ -1198,13 +1198,21 @@ - """dump the file to self.filename.""" - if self.append: new_fo = open(self.filename, 'ab') - else: new_fo = open(self.filename, 'wb') -+ try: -+ # if we have a known range, only try to read that much. -+ (low, high) = self.opts.range -+ amount = high - low -+ except TypeError, ValueError: -+ amount = None - bs = 1024*8 - size = 0 - -+ if amount is not None: bs = min(bs, amount - size) - block = self.read(bs) - size = size + len(block) - while block: - new_fo.write(block) -+ if amount is not None: bs = min(bs, amount - size) - block = self.read(bs) - size = size + len(block) - diff --git a/urlgrabber-no-ssl-ok.patch b/urlgrabber-no-ssl-ok.patch deleted file mode 100644 index 9885f97..0000000 --- a/urlgrabber-no-ssl-ok.patch +++ /dev/null @@ -1,25 +0,0 @@ -diff -rup urlgrabber-3.0.0-orig/urlgrabber/keepalive.py urlgrabber-3.0.0/urlgrabber/keepalive.py ---- urlgrabber-3.0.0-orig/urlgrabber/keepalive.py 2006-07-20 16:15:58.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/keepalive.py 2008-07-10 17:37:06.000000000 -0400 -@@ -328,12 +328,16 @@ class HTTPHandler(KeepAliveHandler, urll - def http_open(self, req): - return self.do_open(HTTPConnection, req) - --class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): -- def __init__(self): -- KeepAliveHandler.__init__(self) -+# If SSL isn't available, don't make urlgrabber completely unusable -+try: -+ class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): -+ def __init__(self): -+ KeepAliveHandler.__init__(self) - -- def https_open(self, req): -- return self.do_open(HTTPSConnection, req) -+ def https_open(self, req): -+ return self.do_open(HTTPSConnection, req) -+except: -+ pass - - class HTTPResponse(httplib.HTTPResponse): - # we need to subclass HTTPResponse in order to diff --git a/urlgrabber-progress-ui.patch b/urlgrabber-progress-ui.patch deleted file mode 100644 index a2f0f26..0000000 --- a/urlgrabber-progress-ui.patch +++ /dev/null @@ -1,305 +0,0 @@ -diff -rup urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py ---- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2008-06-16 00:48:52.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/progress.py 2008-06-16 00:49:25.000000000 -0400 -@@ -24,7 +24,74 @@ import time - import math - import thread - import types -- -+import fcntl -+import struct -+import termios -+ -+# Code from http://mail.python.org/pipermail/python-list/2000-May/033365.html -+def terminal_width(fd=1): -+ """ Get the real terminal width """ -+ try: -+ buf = 'abcdefgh' -+ buf = fcntl.ioctl(fd, termios.TIOCGWINSZ, buf) -+ return struct.unpack('hhhh', buf)[1] -+ except: # IOError -+ return 80 -+ -+_term_width_val = None -+_term_width_last = None -+def terminal_width_cached(fd=1, cache_timeout=1.000): -+ """ Get the real terminal width, but cache it for a bit. """ -+ global _term_width_val -+ global _term_width_last -+ -+ now = time.time() -+ if _term_width_val is None or (now - _term_width_last) > cache_timeout: -+ _term_width_val = terminal_width(fd) -+ _term_width_last = now -+ return _term_width_val -+ -+class TerminalLine: -+ """ Help create dynamic progress bars, uses terminal_width_cached(). """ -+ -+ def __init__(self, min_rest=0, beg_len=None, fd=1, cache_timeout=1.000): -+ if beg_len is None: -+ beg_len = min_rest -+ self._min_len = min_rest -+ self._llen = terminal_width_cached(fd, cache_timeout) -+ if self._llen < beg_len: -+ self._llen = beg_len -+ self._fin = False -+ -+ def __len__(self): -+ """ Usable length for elements. """ -+ return self._llen - self._min_len -+ -+ def rest_split(self, fixed, elements=2): -+ """ After a fixed length, split the rest of the line length among -+ a number of different elements (default=2). """ -+ if self._llen < fixed: -+ return 0 -+ return (self._llen - fixed) / elements -+ -+ def add(self, element, full_len=None): -+ """ If there is room left in the line, above min_len, add element. -+ Note that as soon as one add fails all the rest will fail too. """ -+ -+ if full_len is None: -+ full_len = len(element) -+ if len(self) < full_len: -+ self._fin = True -+ if self._fin: -+ return '' -+ -+ self._llen -= len(element) -+ return element -+ -+ def rest(self): -+ """ Current rest of line, same as .rest_split(fixed=0, elements=1). """ -+ return self._llen -+ - class BaseMeter: - def __init__(self): - self.update_period = 0.3 # seconds -@@ -84,6 +151,64 @@ class BaseMeter: - def _do_end(self, amount_read, now=None): - pass - -+# This is kind of a hack, but progress is gotten from grabber which doesn't -+# know about the total size to download. So we do this so we can get the data -+# out of band here. This will be "fixed" one way or anther soon. -+_text_meter_total_size = 0 -+_text_meter_sofar_size = 0 -+def text_meter_total_size(size, downloaded=0): -+ global _text_meter_total_size -+ global _text_meter_sofar_size -+ _text_meter_total_size = size -+ _text_meter_sofar_size = downloaded -+ -+# -+# update: No size (minimal: 17 chars) -+# ----------------------------------- -+# | -+# 8-48 1 8 3 6 1 9 5 -+# -+# Order: 1. + (17) -+# 2. + (10, total: 27) -+# 3. + ( 5, total: 32) -+# 4. + ( 9, total: 41) -+# -+# update: Size, Single file -+# ------------------------- -+# | ETA -+# 8-25 1 3-4 1 6-16 1 8 3 6 1 9 1 3 1 -+# -+# Order: 1. + (17) -+# 2. + (10, total: 27) -+# 3. +ETA ( 5, total: 32) -+# 4. + ( 4, total: 36) -+# 5. + ( 9, total: 45) -+# 6. + ( 7, total: 52) -+# -+# update: Size, All files -+# ----------------------- -+# | ETA -+# 8-22 1 5-7 1 3-4 1 6-12 1 8 3 6 1 9 1 3 1 -+# -+# Order: 1. + (17) -+# 2. + (10, total: 27) -+# 3. +ETA ( 5, total: 32) -+# 4. + ( 5, total: 37) -+# 4. + ( 4, total: 41) -+# 5. + ( 9, total: 50) -+# 6. + ( 7, total: 57) -+# -+# end -+# --- -+# | -+# 8-56 3 6 1 9 5 -+# -+# Order: 1. ( 8) -+# 2. + ( 9, total: 17) -+# 3. + (10, total: 27) -+# 4. + ( 5, total: 32) -+# -+ - class TextMeter(BaseMeter): - def __init__(self, fo=sys.stderr): - BaseMeter.__init__(self) -@@ -98,38 +223,80 @@ class TextMeter(BaseMeter): - text = self.text - else: - text = self.basename -+ -+ ave_dl = format_number(self.re.average_rate()) -+ sofar_size = None -+ if _text_meter_total_size: -+ sofar_size = _text_meter_sofar_size + amount_read -+ sofar_pc = (sofar_size * 100) / _text_meter_total_size -+ -+ # Include text + ui_rate in minimal -+ tl = TerminalLine(8, 8+1+8) -+ ui_size = tl.add(' | %5sB' % fread) - if self.size is None: -- out = '\r%-60.60s %5sB %s ' % \ -- (text, fread, fetime) -+ ui_time = tl.add(' %9s' % fetime) -+ ui_end = tl.add(' ' * 5) -+ ui_rate = tl.add(' %5sB/s' % ave_dl) -+ out = '\r%-*.*s%s%s%s%s' % (tl.rest(), tl.rest(), text, -+ ui_rate, ui_size, ui_time, ui_end) - else: - rtime = self.re.remaining_time() - frtime = format_time(rtime) - frac = self.re.fraction_read() -- bar = '='*int(25 * frac) - -- out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \ -- (text, frac*100, bar, fread, frtime) -+ ui_time = tl.add(' %9s' % frtime) -+ ui_end = tl.add(' ETA ') -+ -+ if sofar_size is None: -+ ui_sofar_pc = '' -+ else: -+ ui_sofar_pc = tl.add(' (%i%%)' % sofar_pc, -+ full_len=len(" (100%)")) -+ -+ ui_pc = tl.add(' %2i%%' % (frac*100)) -+ ui_rate = tl.add(' %5sB/s' % ave_dl) -+ # Make text grow a bit before we start growing the bar too -+ blen = 4 + tl.rest_split(8 + 8 + 4) -+ bar = '='*int(blen * frac) -+ ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar)) -+ out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text, -+ ui_sofar_pc, ui_pc, ui_bar, -+ ui_rate, ui_size, ui_time, ui_end) - - self.fo.write(out) - self.fo.flush() - - def _do_end(self, amount_read, now=None): -+ global _text_meter_total_size -+ global _text_meter_sofar_size -+ - total_time = format_time(self.re.elapsed_time()) - total_size = format_number(amount_read) - if self.text is not None: - text = self.text - else: - text = self.basename -- if self.size is None: -- out = '\r%-60.60s %5sB %s ' % \ -- (text, total_size, total_time) -- else: -- bar = '='*25 -- out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \ -- (text, 100, bar, total_size, total_time) -- self.fo.write(out + '\n') -+ -+ tl = TerminalLine(8) -+ ui_size = tl.add(' | %5sB' % total_size) -+ ui_time = tl.add(' %9s' % total_time) -+ ui_end = tl.add(' ' * 5) -+ out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text, -+ ui_size, ui_time, ui_end) -+ self.fo.write(out) - self.fo.flush() - -+ # Don't add size to the sofar size until we have all of it. -+ # If we don't have a size, then just pretend/hope we got all of it. -+ if self.size is not None and amount_read != self.size: -+ return -+ -+ if _text_meter_total_size: -+ _text_meter_sofar_size += amount_read -+ if _text_meter_total_size <= _text_meter_sofar_size: -+ _text_meter_total_size = 0 -+ _text_meter_sofar_size = 0 -+ - text_progress_meter = TextMeter - - class MultiFileHelper(BaseMeter): -@@ -397,10 +564,12 @@ class RateEstimator: - #print 'times', now, self.last_update_time - time_diff = now - self.last_update_time - read_diff = amount_read - self.last_amount_read -- self.last_update_time = now -+ # First update, on reget is the file size -+ if self.last_amount_read: -+ self.last_update_time = now -+ self.ave_rate = self._temporal_rolling_ave(\ -+ time_diff, read_diff, self.ave_rate, self.timescale) - self.last_amount_read = amount_read -- self.ave_rate = self._temporal_rolling_ave(\ -- time_diff, read_diff, self.ave_rate, self.timescale) - #print 'results', time_diff, read_diff, self.ave_rate - - ##################################################################### -@@ -529,3 +698,49 @@ def format_number(number, SI=0, space=' - format = '%.0f%s%s' - - return(format % (float(number or 0), space, symbols[depth])) -+ -+def _tst(fn, cur, tot, beg, size, *args): -+ tm = TextMeter() -+ text = "(%d/%d): %s" % (cur, tot, fn) -+ tm.start(fn, "http://www.example.com/path/to/fn/" + fn, fn, size, text=text) -+ num = beg -+ off = 0 -+ for (inc, delay) in args: -+ off += 1 -+ while num < ((size * off) / len(args)): -+ num += inc -+ tm.update(num) -+ time.sleep(delay) -+ tm.end(size) -+ -+if __name__ == "__main__": -+ # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28 -+ # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08 -+ if len(sys.argv) >= 2 and sys.argv[1] == 'total': -+ text_meter_total_size(1000 + 10000 + 10000 + 1000000 + 1000000 + -+ 1000000 + 10000 + 10000 + 10000 + 1000000) -+ _tst("sm-1.0.0-1.fc8.i386.rpm", 1, 10, 0, 1000, -+ (10, 0.2), (10, 0.1), (100, 0.25)) -+ _tst("s-1.0.1-1.fc8.i386.rpm", 2, 10, 0, 10000, -+ (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25)) -+ _tst("m-1.0.1-2.fc8.i386.rpm", 3, 10, 5000, 10000, -+ (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25)) -+ _tst("large-file-name-Foo-11.8.7-4.5.6.1.fc8.x86_64.rpm", 4, 10, 0, 1000000, -+ (1000, 0.2), (1000, 0.1), (10000, 0.1)) -+ _tst("large-file-name-Foo2-11.8.7-4.5.6.2.fc8.x86_64.rpm", 5, 10, -+ 500001, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1)) -+ _tst("large-file-name-Foo3-11.8.7-4.5.6.3.fc8.x86_64.rpm", 6, 10, -+ 750002, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1)) -+ _tst("large-file-name-Foo4-10.8.7-4.5.6.1.fc8.x86_64.rpm", 7, 10, 0, 10000, -+ (100, 0.1)) -+ _tst("large-file-name-Foo5-10.8.7-4.5.6.2.fc8.x86_64.rpm", 8, 10, -+ 5001, 10000, (100, 0.1)) -+ _tst("large-file-name-Foo6-10.8.7-4.5.6.3.fc8.x86_64.rpm", 9, 10, -+ 7502, 10000, (1, 0.1)) -+ _tst("large-file-name-Foox-9.8.7-4.5.6.1.fc8.x86_64.rpm", 10, 10, -+ 0, 1000000, (10, 0.5), -+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1), -+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1), -+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1), -+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1), -+ (100000, 0.1), (1, 0.1)) -Only in urlgrabber-3.0.0/urlgrabber: progress.py.orig diff --git a/urlgrabber-string-type.patch b/urlgrabber-string-type.patch deleted file mode 100644 index bc96da7..0000000 --- a/urlgrabber-string-type.patch +++ /dev/null @@ -1,60 +0,0 @@ -diff -up urlgrabber-3.0.0/urlgrabber/progress.py.type urlgrabber-3.0.0/urlgrabber/progress.py ---- urlgrabber-3.0.0/urlgrabber/progress.py.type 2007-10-10 11:34:55.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/progress.py 2007-10-10 11:36:50.000000000 -0400 -@@ -23,6 +23,7 @@ import sys - import time - import math - import thread -+import types - - class BaseMeter: - def __init__(self): -@@ -343,7 +344,7 @@ class TextMultiFileMeter(MultiFileMeter) - try: - format = "%-30.30s %6.6s %s" - fn = meter.basename -- if type(message) in (type(''), type(u'')): -+ if type(message) in types.StringTypes: - message = message.splitlines() - if not message: message = [''] - out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) -diff -up urlgrabber-3.0.0/urlgrabber/mirror.py.type urlgrabber-3.0.0/urlgrabber/mirror.py ---- urlgrabber-3.0.0/urlgrabber/mirror.py.type 2007-10-10 11:35:22.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/mirror.py 2007-10-10 11:36:14.000000000 -0400 -@@ -90,6 +90,7 @@ CUSTOMIZATION - - import random - import thread # needed for locking to make this threadsafe -+import types - - from grabber import URLGrabError, CallbackObject, DEBUG - -@@ -266,7 +267,7 @@ class MirrorGroup: - def _parse_mirrors(self, mirrors): - parsed_mirrors = [] - for m in mirrors: -- if type(m) == type(''): m = {'mirror': m} -+ if type(m) in types.StringTypes: m = {'mirror': m} - parsed_mirrors.append(m) - return parsed_mirrors - -diff -up urlgrabber-3.0.0/urlgrabber/grabber.py.type urlgrabber-3.0.0/urlgrabber/grabber.py ---- urlgrabber-3.0.0/urlgrabber/grabber.py.type 2007-10-10 11:34:50.000000000 -0400 -+++ urlgrabber-3.0.0/urlgrabber/grabber.py 2007-10-10 11:35:51.000000000 -0400 -@@ -372,6 +372,7 @@ import sys - import urlparse - import rfc822 - import time -+import types - import string - import urllib - import urllib2 -@@ -1128,7 +1129,7 @@ class URLGrabberFileObject: - self.append = 0 - reget_length = 0 - rt = None -- if have_range and self.opts.reget and type(self.filename) == type(''): -+ if have_range and self.opts.reget and type(self.filename) in types.StringTypes: - # we have reget turned on and we're dumping to a file - try: - s = os.stat(self.filename)