latest head patch: adds ssl options ,fixes POST for smolt, enhances error

output
epel9
Seth Vidal 16 years ago
parent b78a1772ac
commit 09166dba1e

@ -1,38 +0,0 @@
--- a/urlgrabber/keepalive.py~ 2009-03-13 10:45:27.000000000 -0400
+++ b/urlgrabber/keepalive.py 2009-03-13 10:45:27.000000000 -0400
@@ -475,7 +475,7 @@
keepalive_handler.close_all()
def continuity(url):
- import md5
+ import hashlib
format = '%25s: %s'
# first fetch the file with the normal http handler
@@ -484,7 +484,7 @@
fo = urllib2.urlopen(url)
foo = fo.read()
fo.close()
- m = md5.new(foo)
+ m = hashlib.md5(foo)
print format % ('normal urllib', m.hexdigest())
# now install the keepalive handler and try again
@@ -494,7 +494,7 @@
fo = urllib2.urlopen(url)
foo = fo.read()
fo.close()
- m = md5.new(foo)
+ m = hashlib.md5(foo)
print format % ('keepalive read', m.hexdigest())
fo = urllib2.urlopen(url)
@@ -504,7 +504,7 @@
if f: foo = foo + f
else: break
fo.close()
- m = md5.new(foo)
+ m = hashlib.md5(foo)
print format % ('keepalive readline', m.hexdigest())
def comp(N, url):

@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber Name: python-urlgrabber
Version: 3.9.0 Version: 3.9.0
Release: 7%{?dist} Release: 8%{?dist}
Source0: urlgrabber-%{version}.tar.gz Source0: urlgrabber-%{version}.tar.gz
Patch0: urlgrabber-HEAD.patch Patch0: urlgrabber-HEAD.patch
@ -44,6 +44,9 @@ rm -rf $RPM_BUILD_ROOT
%{_bindir}/urlgrabber %{_bindir}/urlgrabber
%changelog %changelog
* Tue Aug 18 2009 Seth Vidal <skvidal at fedoraproject.org> - 3.9.0-8
- ssl options, http POST string type fixes
* Mon Aug 10 2009 Seth Vidal <skvidal at fedoraproject.org> - 3.9.0-6 * Mon Aug 10 2009 Seth Vidal <skvidal at fedoraproject.org> - 3.9.0-6
- reget fixes, tmpfiles no longer made for urlopen() calls. - reget fixes, tmpfiles no longer made for urlopen() calls.

@ -1,58 +0,0 @@
diff -ru urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py
--- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2009-04-08 10:24:52.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/progress.py 2009-04-08 10:31:51.000000000 -0400
@@ -34,7 +34,11 @@
try:
buf = 'abcdefgh'
buf = fcntl.ioctl(fd, termios.TIOCGWINSZ, buf)
- return struct.unpack('hhhh', buf)[1]
+ ret = struct.unpack('hhhh', buf)[1]
+ if ret == 0:
+ return 80
+ # Add minimum too?
+ return ret
except: # IOError
return 80
@@ -237,7 +241,7 @@
ui_time = tl.add(' %9s' % fetime)
ui_end = tl.add(' ' * 5)
ui_rate = tl.add(' %5sB/s' % ave_dl)
- out = '\r%-*.*s%s%s%s%s' % (tl.rest(), tl.rest(), text,
+ out = '%-*.*s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
ui_rate, ui_size, ui_time, ui_end)
else:
rtime = self.re.remaining_time()
@@ -261,7 +265,7 @@
if (blen * frac) - int(blen * frac) >= 0.5:
bar += '-'
ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar))
- out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text,
+ out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
ui_sofar_pc, ui_pc, ui_bar,
ui_rate, ui_size, ui_time, ui_end)
@@ -282,7 +286,12 @@
tl = TerminalLine(8)
ui_size = tl.add(' | %5sB' % total_size)
ui_time = tl.add(' %9s' % total_time)
- ui_end = tl.add(' ' * 5)
+ not_done = self.size is not None and amount_read != self.size
+ if not_done:
+ ui_end = tl.add(' ... ')
+ else:
+ ui_end = tl.add(' ' * 5)
+
out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
ui_size, ui_time, ui_end)
self.fo.write(out)
@@ -290,7 +299,7 @@
# Don't add size to the sofar size until we have all of it.
# If we don't have a size, then just pretend/hope we got all of it.
- if self.size is not None and amount_read != self.size:
+ if not_done:
return
if _text_meter_total_size:
Only in urlgrabber-3.0.0/urlgrabber: progress.py~

@ -1,5 +1,5 @@
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index cf51dff..cea47e3 100644 index cf51dff..9692219 100644
--- a/urlgrabber/grabber.py --- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py
@@ -402,11 +402,11 @@ import urllib @@ -402,11 +402,11 @@ import urllib
@ -29,7 +29,25 @@ index cf51dff..cea47e3 100644
######################################################################## ########################################################################
# functions for debugging output. These functions are here because they # functions for debugging output. These functions are here because they
# are also part of the module initialization. # are also part of the module initialization.
@@ -1219,7 +1226,7 @@ class URLGrabberFileObject: @@ -859,8 +866,15 @@ class URLGrabberOptions:
self.data = None
self.urlparser = URLParser()
self.quote = None
- self.ssl_ca_cert = None
- self.ssl_context = None
+ self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
+ self.ssl_context = None # no-op in pycurl
+ self.ssl_verify_peer = True # check peer's cert for authenticityb
+ self.ssl_verify_host = True # make sure who they are and who the cert is for matches
+ self.ssl_key = None # client key
+ self.ssl_key_type = 'PEM' #(or DER)
+ self.ssl_cert = None # client cert
+ self.ssl_cert_type = 'PEM' # (or DER)
+ self.ssl_key_pass = None # password to access the key
def __repr__(self):
return self.format()
@@ -1219,7 +1233,7 @@ class URLGrabberFileObject:
self.append = 0 self.append = 0
reget_length = 0 reget_length = 0
rt = None rt = None
@ -38,7 +56,7 @@ index cf51dff..cea47e3 100644
# we have reget turned on and we're dumping to a file # we have reget turned on and we're dumping to a file
try: try:
s = os.stat(self.filename) s = os.stat(self.filename)
@@ -1450,9 +1457,11 @@ class PyCurlFileObject(): @@ -1450,9 +1464,11 @@ class PyCurlFileObject():
self.scheme = urlparse.urlsplit(self.url)[0] self.scheme = urlparse.urlsplit(self.url)[0]
self.filename = filename self.filename = filename
self.append = False self.append = False
@ -51,7 +69,7 @@ index cf51dff..cea47e3 100644
self._rbuf = '' self._rbuf = ''
self._rbufsize = 1024*8 self._rbufsize = 1024*8
self._ttime = time.time() self._ttime = time.time()
@@ -1474,39 +1483,45 @@ class PyCurlFileObject(): @@ -1474,39 +1490,45 @@ class PyCurlFileObject():
raise AttributeError, name raise AttributeError, name
def _retrieve(self, buf): def _retrieve(self, buf):
@ -129,7 +147,7 @@ index cf51dff..cea47e3 100644
def _return_hdr_obj(self): def _return_hdr_obj(self):
if self._parsed_hdr: if self._parsed_hdr:
@@ -1528,11 +1543,13 @@ class PyCurlFileObject(): @@ -1528,11 +1550,13 @@ class PyCurlFileObject():
# defaults we're always going to set # defaults we're always going to set
@ -145,7 +163,7 @@ index cf51dff..cea47e3 100644
if DEBUG: if DEBUG:
self.curl_obj.setopt(pycurl.VERBOSE, True) self.curl_obj.setopt(pycurl.VERBOSE, True)
@@ -1540,15 +1557,15 @@ class PyCurlFileObject(): @@ -1540,19 +1564,32 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent) self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
# maybe to be options later # maybe to be options later
@ -164,17 +182,39 @@ index cf51dff..cea47e3 100644
# ssl options # ssl options
if self.scheme == 'https': if self.scheme == 'https':
if opts.ssl_ca_cert: # this may do ZERO with nss according to curl docs if opts.ssl_ca_cert: # this may do ZERO with nss according to curl docs
@@ -1607,18 +1624,33 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
+ self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
+ self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
+ self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
+ if opts.ssl_key:
+ self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
+ if opts.ssl_key_type:
+ self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type)
+ if opts.ssl_cert:
+ self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
+ if opts.ssl_cert_type:
+ self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
+ if opts.ssl_key_pass:
+ self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
#headers:
if opts.http_headers and self.scheme in ('http', 'https'):
@@ -1590,7 +1627,7 @@ class PyCurlFileObject():
#posts - simple - expects the fields as they are
if opts.data:
self.curl_obj.setopt(pycurl.POST, True)
- self.curl_obj.setopt(pycurl.POSTFIELDS, opts.data)
+ self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
@@ -1607,18 +1644,51 @@ class PyCurlFileObject():
# to other URLGrabErrors from # to other URLGrabErrors from
# http://curl.haxx.se/libcurl/c/libcurl-errors.html # http://curl.haxx.se/libcurl/c/libcurl-errors.html
# this covers e.args[0] == 22 pretty well - which will be common # this covers e.args[0] == 22 pretty well - which will be common
- if str(e.args[1]) == '': # fake it until you make it
+ code = self.http_code + code = self.http_code
+ if e.args[0] == 28: + if e.args[0] == 23 and code >= 200 and code < 299:
+ err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+ err.url = self.url
+ raise err
+
+ elif e.args[0] == 23 and code >= 200 and code < 299:
+ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) + err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
+ err.url = self.url + err.url = self.url
+ # this is probably wrong but ultimately this is what happens + # this is probably wrong but ultimately this is what happens
@ -184,11 +224,35 @@ index cf51dff..cea47e3 100644
+ # a ctrl-c. XXXX - if there's a way of going back two raises to + # a ctrl-c. XXXX - if there's a way of going back two raises to
+ # figure out what aborted the pycurl process FIXME + # figure out what aborted the pycurl process FIXME
+ raise KeyboardInterrupt + raise KeyboardInterrupt
+
+ elif e.args[0] == 28:
+ err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+ err.url = self.url
+ raise err
+ elif e.args[0] == 35:
+ msg = _("problem making ssl connection")
+ err = URLGrabError(14, msg)
+ err.url = self.url
+ raise err
+
+ elif e.args[0] == 58:
+ msg = _("problem with the local client certificate")
+ err = URLGrabError(14, msg)
+ err.url = self.url
+ raise err
+ +
if str(e.args[1]) == '': # fake it until you make it + elif e.args[0] == 60:
+ msg = _("client cert cannot be verified or client cert incorrect")
+ err = URLGrabError(14, msg)
+ err.url = self.url
+ raise err
+
+ elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
else: else:
msg = str(e.args[1]) - msg = str(e.args[1])
+ msg = 'PYCURL ERROR %s - "%s"' % (e.args[0], str(e.args[1]))
+ code = e.args[0]
err = URLGrabError(14, msg) err = URLGrabError(14, msg)
- err.code = self.http_code - err.code = self.http_code
+ err.code = code + err.code = code
@ -202,7 +266,7 @@ index cf51dff..cea47e3 100644
self.curl_obj = _curl_cache self.curl_obj = _curl_cache
self.curl_obj.reset() # reset all old settings away, just in case self.curl_obj.reset() # reset all old settings away, just in case
# setup any ranges # setup any ranges
@@ -1630,11 +1662,9 @@ class PyCurlFileObject(): @@ -1630,11 +1700,9 @@ class PyCurlFileObject():
pass pass
def _build_range(self): def _build_range(self):
@ -215,8 +279,12 @@ index cf51dff..cea47e3 100644
# we have reget turned on and we're dumping to a file # we have reget turned on and we're dumping to a file
try: try:
s = os.stat(self.filename) s = os.stat(self.filename)
@@ -1729,7 +1759,7 @@ class PyCurlFileObject(): @@ -1726,10 +1794,10 @@ class PyCurlFileObject():
if self.filename: if self._complete:
return
- if self.filename:
+ if self.filename is not None:
self._prog_reportname = str(self.filename) self._prog_reportname = str(self.filename)
self._prog_basename = os.path.basename(self.filename) self._prog_basename = os.path.basename(self.filename)
- -
@ -224,7 +292,7 @@ index cf51dff..cea47e3 100644
if self.append: mode = 'ab' if self.append: mode = 'ab'
else: mode = 'wb' else: mode = 'wb'
@@ -1746,19 +1776,23 @@ class PyCurlFileObject(): @@ -1746,19 +1814,23 @@ class PyCurlFileObject():
else: else:
self._prog_reportname = 'MEMORY' self._prog_reportname = 'MEMORY'
self._prog_basename = 'MEMORY' self._prog_basename = 'MEMORY'
@ -253,7 +321,7 @@ index cf51dff..cea47e3 100644
# set the time # set the time
mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME) mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
if mod_time != -1: if mod_time != -1:
@@ -1766,7 +1800,8 @@ class PyCurlFileObject(): @@ -1766,7 +1838,8 @@ class PyCurlFileObject():
# re open it # re open it
self.fo = open(self.filename, 'r') self.fo = open(self.filename, 'r')
else: else:
@ -263,11 +331,17 @@ index cf51dff..cea47e3 100644
self._complete = True self._complete = True
@@ -1838,6 +1873,7 @@ class PyCurlFileObject(): @@ -1838,6 +1911,13 @@ class PyCurlFileObject():
downloaded += self._reget_length downloaded += self._reget_length
self.opts.progress_obj.update(downloaded) self.opts.progress_obj.update(downloaded)
+ + def _to_utf8(self, obj, errors='replace'):
+ '''convert 'unicode' to an encoded utf-8 byte string '''
+ # stolen from yum.i18n
+ if isinstance(obj, unicode):
+ obj = obj.encode('utf-8', errors)
+ return obj
+
def read(self, amt=None): def read(self, amt=None):
self._fill_buffer(amt) self._fill_buffer(amt)
if amt is None: if amt is None:

@ -1,13 +0,0 @@
diff -ru urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py
--- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2008-10-14 23:46:24.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/progress.py 2008-10-14 23:57:53.000000000 -0400
@@ -258,6 +258,8 @@
# Make text grow a bit before we start growing the bar too
blen = 4 + tl.rest_split(8 + 8 + 4)
bar = '='*int(blen * frac)
+ if (blen * frac) - int(blen * frac) >= 0.5:
+ bar += '-'
ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar))
out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text,
ui_sofar_pc, ui_pc, ui_bar,
Only in urlgrabber-3.0.0/urlgrabber: progress.py~

@ -1,16 +0,0 @@
--- foo/urlgrabber/grabber.py~ 2009-03-09 12:01:21.000000000 -0400
+++ foo/urlgrabber/grabber.py 2009-03-09 12:01:21.000000000 -0400
@@ -913,6 +913,13 @@
raise URLGrabError(3,
_('Not a normal file: %s') % (path, ))
elif not opts.range:
+ if not opts.checkfunc is None:
+ cb_func, cb_args, cb_kwargs = \
+ self._make_callback(opts.checkfunc)
+ obj = CallbackObject()
+ obj.filename = path
+ obj.url = url
+ apply(cb_func, (obj, )+cb_args, cb_kwargs)
return path
def retryfunc(opts, url, filename):

@ -1,16 +0,0 @@
Index: urlgrabber/byterange.py
===================================================================
RCS file: /home/groups/urlgrabber/cvs-root/urlgrabber/urlgrabber/byterange.py,v
retrieving revision 1.12
diff -u -r1.12 byterange.py
--- urlgrabber/byterange.py 20 Jul 2006 20:15:58 -0000 1.12
+++ urlgrabber/byterange.py 13 Mar 2008 18:48:56 -0000
@@ -272,6 +272,8 @@
host, port = splitport(host)
if port is None:
port = ftplib.FTP_PORT
+ else:
+ port = int(port)
# username/password handling
user, host = splituser(host)

@ -1,64 +0,0 @@
diff -ru urlgrabber-3.0.0-orig/urlgrabber/grabber.py urlgrabber-3.0.0/urlgrabber/grabber.py
--- urlgrabber-3.0.0-orig/urlgrabber/grabber.py 2008-05-02 17:18:14.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/grabber.py 2008-05-02 17:30:35.000000000 -0400
@@ -400,6 +400,11 @@
HTTPException = None
try:
+ from httplib import HTTPResponse
+except ImportError, msg:
+ HTTPResponse = None
+
+try:
# This is a convenient way to make keepalive optional.
# Just rename the module so it can't be imported.
import keepalive
@@ -1065,8 +1070,21 @@
req = urllib2.Request(self.url, self.opts.data) # build request object
self._add_headers(req) # add misc headers that we need
self._build_range(req) # take care of reget and byterange stuff
+
+ def _checkNoReget(fo):
+ # HTTP can disallow Range requests
+
+ if self.opts.reget is None:
+ return False
+ if 'Range' not in req.headers:
+ return False
+ if not isinstance(fo, HTTPResponse):
+ return False
+
+ return fo.code != 206
fo, hdr = self._make_request(req, opener)
+ fetch_again = 0
if self.reget_time and self.opts.reget == 'check_timestamp':
# do this if we have a local file with known timestamp AND
# we're in check_timestamp reget mode.
@@ -1078,14 +1096,26 @@
except (TypeError,):
fetch_again = 1
+ if _checkNoReget(fo): # doing a "reget" didn't work, so fixup
+ fetch_again = 0
+ self.opts.reget = None
+ self.append = 0
+ self._amount_read = 0
+
+ if True:
if fetch_again:
# the server version is newer than the (incomplete) local
# version, so we should abandon the version we're getting
# and fetch the whole thing again.
fo.close()
+ # Without this we'll have to read all of the previous request
+ # data. For "large" requests, this is very bad.
+ fo.close_connection()
self.opts.reget = None
del req.headers['Range']
self._build_range(req)
+ # This doesn't get reset in _build_range() *sigh*
+ self._amount_read = 0
fo, hdr = self._make_request(req, opener)
(scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)

@ -1,24 +0,0 @@
--- urlgrabber/grabber.py 22 Sep 2006 00:58:05 -0000 1.48
+++ urlgrabber/grabber.py 5 Dec 2006 23:48:51 -0000
@@ -1198,13 +1198,21 @@
"""dump the file to self.filename."""
if self.append: new_fo = open(self.filename, 'ab')
else: new_fo = open(self.filename, 'wb')
+ try:
+ # if we have a known range, only try to read that much.
+ (low, high) = self.opts.range
+ amount = high - low
+ except TypeError, ValueError:
+ amount = None
bs = 1024*8
size = 0
+ if amount is not None: bs = min(bs, amount - size)
block = self.read(bs)
size = size + len(block)
while block:
new_fo.write(block)
+ if amount is not None: bs = min(bs, amount - size)
block = self.read(bs)
size = size + len(block)

@ -1,25 +0,0 @@
diff -rup urlgrabber-3.0.0-orig/urlgrabber/keepalive.py urlgrabber-3.0.0/urlgrabber/keepalive.py
--- urlgrabber-3.0.0-orig/urlgrabber/keepalive.py 2006-07-20 16:15:58.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/keepalive.py 2008-07-10 17:37:06.000000000 -0400
@@ -328,12 +328,16 @@ class HTTPHandler(KeepAliveHandler, urll
def http_open(self, req):
return self.do_open(HTTPConnection, req)
-class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
- def __init__(self):
- KeepAliveHandler.__init__(self)
+# If SSL isn't available, don't make urlgrabber completely unusable
+try:
+ class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
+ def __init__(self):
+ KeepAliveHandler.__init__(self)
- def https_open(self, req):
- return self.do_open(HTTPSConnection, req)
+ def https_open(self, req):
+ return self.do_open(HTTPSConnection, req)
+except:
+ pass
class HTTPResponse(httplib.HTTPResponse):
# we need to subclass HTTPResponse in order to

@ -1,305 +0,0 @@
diff -rup urlgrabber-3.0.0-orig/urlgrabber/progress.py urlgrabber-3.0.0/urlgrabber/progress.py
--- urlgrabber-3.0.0-orig/urlgrabber/progress.py 2008-06-16 00:48:52.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/progress.py 2008-06-16 00:49:25.000000000 -0400
@@ -24,7 +24,74 @@ import time
import math
import thread
import types
-
+import fcntl
+import struct
+import termios
+
+# Code from http://mail.python.org/pipermail/python-list/2000-May/033365.html
+def terminal_width(fd=1):
+ """ Get the real terminal width """
+ try:
+ buf = 'abcdefgh'
+ buf = fcntl.ioctl(fd, termios.TIOCGWINSZ, buf)
+ return struct.unpack('hhhh', buf)[1]
+ except: # IOError
+ return 80
+
+_term_width_val = None
+_term_width_last = None
+def terminal_width_cached(fd=1, cache_timeout=1.000):
+ """ Get the real terminal width, but cache it for a bit. """
+ global _term_width_val
+ global _term_width_last
+
+ now = time.time()
+ if _term_width_val is None or (now - _term_width_last) > cache_timeout:
+ _term_width_val = terminal_width(fd)
+ _term_width_last = now
+ return _term_width_val
+
+class TerminalLine:
+ """ Help create dynamic progress bars, uses terminal_width_cached(). """
+
+ def __init__(self, min_rest=0, beg_len=None, fd=1, cache_timeout=1.000):
+ if beg_len is None:
+ beg_len = min_rest
+ self._min_len = min_rest
+ self._llen = terminal_width_cached(fd, cache_timeout)
+ if self._llen < beg_len:
+ self._llen = beg_len
+ self._fin = False
+
+ def __len__(self):
+ """ Usable length for elements. """
+ return self._llen - self._min_len
+
+ def rest_split(self, fixed, elements=2):
+ """ After a fixed length, split the rest of the line length among
+ a number of different elements (default=2). """
+ if self._llen < fixed:
+ return 0
+ return (self._llen - fixed) / elements
+
+ def add(self, element, full_len=None):
+ """ If there is room left in the line, above min_len, add element.
+ Note that as soon as one add fails all the rest will fail too. """
+
+ if full_len is None:
+ full_len = len(element)
+ if len(self) < full_len:
+ self._fin = True
+ if self._fin:
+ return ''
+
+ self._llen -= len(element)
+ return element
+
+ def rest(self):
+ """ Current rest of line, same as .rest_split(fixed=0, elements=1). """
+ return self._llen
+
class BaseMeter:
def __init__(self):
self.update_period = 0.3 # seconds
@@ -84,6 +151,64 @@ class BaseMeter:
def _do_end(self, amount_read, now=None):
pass
+# This is kind of a hack, but progress is gotten from grabber which doesn't
+# know about the total size to download. So we do this so we can get the data
+# out of band here. This will be "fixed" one way or anther soon.
+_text_meter_total_size = 0
+_text_meter_sofar_size = 0
+def text_meter_total_size(size, downloaded=0):
+ global _text_meter_total_size
+ global _text_meter_sofar_size
+ _text_meter_total_size = size
+ _text_meter_sofar_size = downloaded
+
+#
+# update: No size (minimal: 17 chars)
+# -----------------------------------
+# <text> <rate> | <current size> <elapsed time>
+# 8-48 1 8 3 6 1 9 5
+#
+# Order: 1. <text>+<current size> (17)
+# 2. +<elapsed time> (10, total: 27)
+# 3. + ( 5, total: 32)
+# 4. +<rate> ( 9, total: 41)
+#
+# update: Size, Single file
+# -------------------------
+# <text> <pc> <bar> <rate> | <current size> <eta time> ETA
+# 8-25 1 3-4 1 6-16 1 8 3 6 1 9 1 3 1
+#
+# Order: 1. <text>+<current size> (17)
+# 2. +<eta time> (10, total: 27)
+# 3. +ETA ( 5, total: 32)
+# 4. +<pc> ( 4, total: 36)
+# 5. +<rate> ( 9, total: 45)
+# 6. +<bar> ( 7, total: 52)
+#
+# update: Size, All files
+# -----------------------
+# <text> <total pc> <pc> <bar> <rate> | <current size> <eta time> ETA
+# 8-22 1 5-7 1 3-4 1 6-12 1 8 3 6 1 9 1 3 1
+#
+# Order: 1. <text>+<current size> (17)
+# 2. +<eta time> (10, total: 27)
+# 3. +ETA ( 5, total: 32)
+# 4. +<total pc> ( 5, total: 37)
+# 4. +<pc> ( 4, total: 41)
+# 5. +<rate> ( 9, total: 50)
+# 6. +<bar> ( 7, total: 57)
+#
+# end
+# ---
+# <text> | <current size> <elapsed time>
+# 8-56 3 6 1 9 5
+#
+# Order: 1. <text> ( 8)
+# 2. +<current size> ( 9, total: 17)
+# 3. +<elapsed time> (10, total: 27)
+# 4. + ( 5, total: 32)
+#
+
class TextMeter(BaseMeter):
def __init__(self, fo=sys.stderr):
BaseMeter.__init__(self)
@@ -98,38 +223,80 @@ class TextMeter(BaseMeter):
text = self.text
else:
text = self.basename
+
+ ave_dl = format_number(self.re.average_rate())
+ sofar_size = None
+ if _text_meter_total_size:
+ sofar_size = _text_meter_sofar_size + amount_read
+ sofar_pc = (sofar_size * 100) / _text_meter_total_size
+
+ # Include text + ui_rate in minimal
+ tl = TerminalLine(8, 8+1+8)
+ ui_size = tl.add(' | %5sB' % fread)
if self.size is None:
- out = '\r%-60.60s %5sB %s ' % \
- (text, fread, fetime)
+ ui_time = tl.add(' %9s' % fetime)
+ ui_end = tl.add(' ' * 5)
+ ui_rate = tl.add(' %5sB/s' % ave_dl)
+ out = '\r%-*.*s%s%s%s%s' % (tl.rest(), tl.rest(), text,
+ ui_rate, ui_size, ui_time, ui_end)
else:
rtime = self.re.remaining_time()
frtime = format_time(rtime)
frac = self.re.fraction_read()
- bar = '='*int(25 * frac)
- out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
- (text, frac*100, bar, fread, frtime)
+ ui_time = tl.add(' %9s' % frtime)
+ ui_end = tl.add(' ETA ')
+
+ if sofar_size is None:
+ ui_sofar_pc = ''
+ else:
+ ui_sofar_pc = tl.add(' (%i%%)' % sofar_pc,
+ full_len=len(" (100%)"))
+
+ ui_pc = tl.add(' %2i%%' % (frac*100))
+ ui_rate = tl.add(' %5sB/s' % ave_dl)
+ # Make text grow a bit before we start growing the bar too
+ blen = 4 + tl.rest_split(8 + 8 + 4)
+ bar = '='*int(blen * frac)
+ ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar))
+ out = '\r%-*.*s%s%s%s%s%s%s%s' % (tl.rest(), tl.rest(), text,
+ ui_sofar_pc, ui_pc, ui_bar,
+ ui_rate, ui_size, ui_time, ui_end)
self.fo.write(out)
self.fo.flush()
def _do_end(self, amount_read, now=None):
+ global _text_meter_total_size
+ global _text_meter_sofar_size
+
total_time = format_time(self.re.elapsed_time())
total_size = format_number(amount_read)
if self.text is not None:
text = self.text
else:
text = self.basename
- if self.size is None:
- out = '\r%-60.60s %5sB %s ' % \
- (text, total_size, total_time)
- else:
- bar = '='*25
- out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \
- (text, 100, bar, total_size, total_time)
- self.fo.write(out + '\n')
+
+ tl = TerminalLine(8)
+ ui_size = tl.add(' | %5sB' % total_size)
+ ui_time = tl.add(' %9s' % total_time)
+ ui_end = tl.add(' ' * 5)
+ out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
+ ui_size, ui_time, ui_end)
+ self.fo.write(out)
self.fo.flush()
+ # Don't add size to the sofar size until we have all of it.
+ # If we don't have a size, then just pretend/hope we got all of it.
+ if self.size is not None and amount_read != self.size:
+ return
+
+ if _text_meter_total_size:
+ _text_meter_sofar_size += amount_read
+ if _text_meter_total_size <= _text_meter_sofar_size:
+ _text_meter_total_size = 0
+ _text_meter_sofar_size = 0
+
text_progress_meter = TextMeter
class MultiFileHelper(BaseMeter):
@@ -397,10 +564,12 @@ class RateEstimator:
#print 'times', now, self.last_update_time
time_diff = now - self.last_update_time
read_diff = amount_read - self.last_amount_read
- self.last_update_time = now
+ # First update, on reget is the file size
+ if self.last_amount_read:
+ self.last_update_time = now
+ self.ave_rate = self._temporal_rolling_ave(\
+ time_diff, read_diff, self.ave_rate, self.timescale)
self.last_amount_read = amount_read
- self.ave_rate = self._temporal_rolling_ave(\
- time_diff, read_diff, self.ave_rate, self.timescale)
#print 'results', time_diff, read_diff, self.ave_rate
#####################################################################
@@ -529,3 +698,49 @@ def format_number(number, SI=0, space='
format = '%.0f%s%s'
return(format % (float(number or 0), space, symbols[depth]))
+
+def _tst(fn, cur, tot, beg, size, *args):
+ tm = TextMeter()
+ text = "(%d/%d): %s" % (cur, tot, fn)
+ tm.start(fn, "http://www.example.com/path/to/fn/" + fn, fn, size, text=text)
+ num = beg
+ off = 0
+ for (inc, delay) in args:
+ off += 1
+ while num < ((size * off) / len(args)):
+ num += inc
+ tm.update(num)
+ time.sleep(delay)
+ tm.end(size)
+
+if __name__ == "__main__":
+ # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28
+ # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08
+ if len(sys.argv) >= 2 and sys.argv[1] == 'total':
+ text_meter_total_size(1000 + 10000 + 10000 + 1000000 + 1000000 +
+ 1000000 + 10000 + 10000 + 10000 + 1000000)
+ _tst("sm-1.0.0-1.fc8.i386.rpm", 1, 10, 0, 1000,
+ (10, 0.2), (10, 0.1), (100, 0.25))
+ _tst("s-1.0.1-1.fc8.i386.rpm", 2, 10, 0, 10000,
+ (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
+ _tst("m-1.0.1-2.fc8.i386.rpm", 3, 10, 5000, 10000,
+ (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
+ _tst("large-file-name-Foo-11.8.7-4.5.6.1.fc8.x86_64.rpm", 4, 10, 0, 1000000,
+ (1000, 0.2), (1000, 0.1), (10000, 0.1))
+ _tst("large-file-name-Foo2-11.8.7-4.5.6.2.fc8.x86_64.rpm", 5, 10,
+ 500001, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
+ _tst("large-file-name-Foo3-11.8.7-4.5.6.3.fc8.x86_64.rpm", 6, 10,
+ 750002, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
+ _tst("large-file-name-Foo4-10.8.7-4.5.6.1.fc8.x86_64.rpm", 7, 10, 0, 10000,
+ (100, 0.1))
+ _tst("large-file-name-Foo5-10.8.7-4.5.6.2.fc8.x86_64.rpm", 8, 10,
+ 5001, 10000, (100, 0.1))
+ _tst("large-file-name-Foo6-10.8.7-4.5.6.3.fc8.x86_64.rpm", 9, 10,
+ 7502, 10000, (1, 0.1))
+ _tst("large-file-name-Foox-9.8.7-4.5.6.1.fc8.x86_64.rpm", 10, 10,
+ 0, 1000000, (10, 0.5),
+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
+ (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
+ (100000, 0.1), (1, 0.1))
Only in urlgrabber-3.0.0/urlgrabber: progress.py.orig

@ -1,60 +0,0 @@
diff -up urlgrabber-3.0.0/urlgrabber/progress.py.type urlgrabber-3.0.0/urlgrabber/progress.py
--- urlgrabber-3.0.0/urlgrabber/progress.py.type 2007-10-10 11:34:55.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/progress.py 2007-10-10 11:36:50.000000000 -0400
@@ -23,6 +23,7 @@ import sys
import time
import math
import thread
+import types
class BaseMeter:
def __init__(self):
@@ -343,7 +344,7 @@ class TextMultiFileMeter(MultiFileMeter)
try:
format = "%-30.30s %6.6s %s"
fn = meter.basename
- if type(message) in (type(''), type(u'')):
+ if type(message) in types.StringTypes:
message = message.splitlines()
if not message: message = ['']
out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
diff -up urlgrabber-3.0.0/urlgrabber/mirror.py.type urlgrabber-3.0.0/urlgrabber/mirror.py
--- urlgrabber-3.0.0/urlgrabber/mirror.py.type 2007-10-10 11:35:22.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/mirror.py 2007-10-10 11:36:14.000000000 -0400
@@ -90,6 +90,7 @@ CUSTOMIZATION
import random
import thread # needed for locking to make this threadsafe
+import types
from grabber import URLGrabError, CallbackObject, DEBUG
@@ -266,7 +267,7 @@ class MirrorGroup:
def _parse_mirrors(self, mirrors):
parsed_mirrors = []
for m in mirrors:
- if type(m) == type(''): m = {'mirror': m}
+ if type(m) in types.StringTypes: m = {'mirror': m}
parsed_mirrors.append(m)
return parsed_mirrors
diff -up urlgrabber-3.0.0/urlgrabber/grabber.py.type urlgrabber-3.0.0/urlgrabber/grabber.py
--- urlgrabber-3.0.0/urlgrabber/grabber.py.type 2007-10-10 11:34:50.000000000 -0400
+++ urlgrabber-3.0.0/urlgrabber/grabber.py 2007-10-10 11:35:51.000000000 -0400
@@ -372,6 +372,7 @@ import sys
import urlparse
import rfc822
import time
+import types
import string
import urllib
import urllib2
@@ -1128,7 +1129,7 @@ class URLGrabberFileObject:
self.append = 0
reget_length = 0
rt = None
- if have_range and self.opts.reget and type(self.filename) == type(''):
+ if have_range and self.opts.reget and type(self.filename) in types.StringTypes:
# we have reget turned on and we're dumping to a file
try:
s = os.stat(self.filename)
Loading…
Cancel
Save