From 2ed39c5e95a16fbc5d6c3fed7c7d7ff54d973e4c Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: May 12 2017 07:52:42 +0000 Subject: [PATCH 1/5] remove non-printable characters in fixEncoding Some real-world changelogs contains non-printable characters or invalid unicode ones. xmlrpc fails on such strings, so we sanitize changelog strings before passing it to client. Related: https://pagure.io/koji/issue/349 --- diff --git a/hub/kojihub.py b/hub/kojihub.py index 8c344f1..e7ad99f 100644 --- a/hub/kojihub.py +++ b/hub/kojihub.py @@ -9376,7 +9376,8 @@ class RootExports(object): else: results.append({'date': cldate, 'date_ts': cltime, 'author': clname, 'text': cltext}) - return _applyQueryOpts(results, queryOpts) + results = _applyQueryOpts(results, queryOpts) + return koji.fixEncodingRecurse(results) def cancelBuild(self, buildID): """Cancel the build with the given buildID diff --git a/koji/__init__.py b/koji/__init__.py index 7bafee7..f3cc50f 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -2894,7 +2894,12 @@ def _taskLabel(taskInfo): else: return '%s (%s)' % (method, arch) -def fixEncoding(value, fallback='iso8859-15'): +NONPRINTABLE = ''.join([chr(x) for x in range(10) + range(11, 13) + range(14, 32) + [127]]) +def removeNonprintable(value): + # expects raw-encoded string, not unicode + return value.translate(None, NONPRINTABLE) + +def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=True): """ Convert value to a 'str' object encoded as UTF-8. If value is not valid UTF-8 to begin with, assume it is @@ -2906,18 +2911,22 @@ def fixEncoding(value, fallback='iso8859-15'): if isinstance(value, unicode): # value is already unicode, so just convert it # to a utf8-encoded str - return value.encode('utf8') + s = value.encode('utf8') else: # value is a str, but may be encoded in utf8 or some # other non-ascii charset. Try to verify it's utf8, and if not, # decode it using the fallback encoding. try: - return value.decode('utf8').encode('utf8') + s = value.decode('utf8').encode('utf8') except UnicodeDecodeError: - return value.decode(fallback).encode('utf8') + s = value.decode(fallback).encode('utf8') + if remove_nonprintable: + return removeNonprintable(s) + else: + return s -def fixEncodingRecurse(value, fallback='iso8859-15'): +def fixEncodingRecurse(value, fallback='iso8859-15', remove_nonprintable=True): """Recursively fix string encoding in an object Similar behavior to fixEncoding, but recursive @@ -2934,15 +2943,22 @@ def fixEncodingRecurse(value, fallback='iso8859-15'): ret[k] = v return ret elif isinstance(value, unicode): - return value.encode('utf8') + if remove_nonprintable: + return removeNonprintable(value.encode('utf8')) + else: + return value.encode('utf8') elif isinstance(value, str): # value is a str, but may be encoded in utf8 or some # other non-ascii charset. Try to verify it's utf8, and if not, # decode it using the fallback encoding. try: - return value.decode('utf8').encode('utf8') - except UnicodeDecodeError, err: - return value.decode(fallback).encode('utf8') + s = value.decode('utf8').encode('utf8') + except UnicodeDecodeError: + s = value.decode(fallback).encode('utf8') + if remove_nonprintable: + return removeNonprintable(s) + else: + return s else: return value From 6a1ea7d1dcd297d47da7d81e88474c3b2ac11e47 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: May 12 2017 07:52:42 +0000 Subject: [PATCH 2/5] less restrictive filter --- diff --git a/koji/__init__.py b/koji/__init__.py index f3cc50f..d35db93 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -2894,10 +2894,11 @@ def _taskLabel(taskInfo): else: return '%s (%s)' % (method, arch) -NONPRINTABLE = ''.join([chr(x) for x in range(10) + range(11, 13) + range(14, 32) + [127]]) +CONTROL_CHARS = [chr(i) for i in range(32)] +NONPRINTABLE_CHARS = ''.join([c for c in CONTROL_CHARS if c not in '\r\n\t']) def removeNonprintable(value): # expects raw-encoded string, not unicode - return value.translate(None, NONPRINTABLE) + return value.translate(None, NONPRINTABLE_CHARS) def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=True): """ From a7b4389b81cb66816d4d93e0a4af0685dae01a83 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: May 12 2017 07:52:42 +0000 Subject: [PATCH 3/5] don't remove nonprintable characters by default --- diff --git a/hub/kojihub.py b/hub/kojihub.py index e7ad99f..3258f03 100644 --- a/hub/kojihub.py +++ b/hub/kojihub.py @@ -9377,7 +9377,7 @@ class RootExports(object): results.append({'date': cldate, 'date_ts': cltime, 'author': clname, 'text': cltext}) results = _applyQueryOpts(results, queryOpts) - return koji.fixEncodingRecurse(results) + return koji.fixEncodingRecurse(results, remove_nonprintable=True) def cancelBuild(self, buildID): """Cancel the build with the given buildID diff --git a/koji/__init__.py b/koji/__init__.py index d35db93..fc6456f 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -2900,7 +2900,7 @@ def removeNonprintable(value): # expects raw-encoded string, not unicode return value.translate(None, NONPRINTABLE_CHARS) -def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=True): +def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=False): """ Convert value to a 'str' object encoded as UTF-8. If value is not valid UTF-8 to begin with, assume it is @@ -2927,7 +2927,7 @@ def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=True): return s -def fixEncodingRecurse(value, fallback='iso8859-15', remove_nonprintable=True): +def fixEncodingRecurse(value, fallback='iso8859-15', remove_nonprintable=False): """Recursively fix string encoding in an object Similar behavior to fixEncoding, but recursive From 356c026367c4ac24de940cacb48eabc117b485bc Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: May 12 2017 07:52:42 +0000 Subject: [PATCH 4/5] propagate parameters recursively --- diff --git a/koji/__init__.py b/koji/__init__.py index fc6456f..84aceb0 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -2933,14 +2933,14 @@ def fixEncodingRecurse(value, fallback='iso8859-15', remove_nonprintable=False): Similar behavior to fixEncoding, but recursive """ if isinstance(value, tuple): - return tuple([fixEncodingRecurse(x) for x in value]) + return tuple([fixEncodingRecurse(x, fallback=fallback, remove_nonprintable=remove_nonprintable) for x in value]) elif isinstance(value, list): - return list([fixEncodingRecurse(x) for x in value]) + return list([fixEncodingRecurse(x, fallback=fallback, remove_nonprintable=remove_nonprintable) for x in value]) elif isinstance(value, dict): ret = {} for k in value: - v = fixEncodingRecurse(value[k]) - k = fixEncodingRecurse(k) + v = fixEncodingRecurse(value[k], fallback=fallback, remove_nonprintable=remove_nonprintable) + k = fixEncodingRecurse(k, fallback=fallback, remove_nonprintable=remove_nonprintable) ret[k] = v return ret elif isinstance(value, unicode): From 4d37db92502dd8098ebcb77ba28fc5fc71214f55 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: May 12 2017 07:58:03 +0000 Subject: [PATCH 5/5] remove non-printable characters from getRPMHeaders result --- diff --git a/hub/kojihub.py b/hub/kojihub.py index 3258f03..e3afce1 100644 --- a/hub/kojihub.py +++ b/hub/kojihub.py @@ -9885,7 +9885,7 @@ class RootExports(object): headers = koji.get_header_fields(rpm_path, headers) for key, value in headers.items(): if isinstance(value, basestring): - headers[key] = koji.fixEncoding(value) + headers[key] = koji.fixEncoding(value, remove_nonprintable=True) return headers queryRPMSigs = staticmethod(query_rpm_sigs)