Compare commits

..

No commits in common. 'c9' and 'c10-beta' have entirely different histories.
c9 ... c10-beta

2
.gitignore vendored

@ -1 +1 @@
SOURCES/lxml-4.6.5.tar.gz
SOURCES/lxml-5.2.1-no-isoschematron-rng.tar.gz

@ -1 +1 @@
04a3ed4d33a511b5796880461b0edb6f3b144547 SOURCES/lxml-4.6.5.tar.gz
f2f98b02943baf31850f083b175c95fc3b242602 SOURCES/lxml-5.2.1-no-isoschematron-rng.tar.gz

@ -0,0 +1,40 @@
From f5f64df808b35969794ba3ea8c19079276aa0cb0 Mon Sep 17 00:00:00 2001
From: Lumir Balhar <lbalhar@redhat.com>
Date: Thu, 4 Apr 2024 11:11:38 +0200
Subject: [PATCH] Skip test_feedparser_data if lxml_html_clean is not available
This is useful mostly for distributors shipping lxml without
lxml_html_clean.
---
src/lxml/html/tests/test_feedparser_data.py | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/lxml/html/tests/test_feedparser_data.py b/src/lxml/html/tests/test_feedparser_data.py
index 36fba1acb..264c0d4b5 100644
--- a/src/lxml/html/tests/test_feedparser_data.py
+++ b/src/lxml/html/tests/test_feedparser_data.py
@@ -9,7 +9,11 @@
from lxml.tests.common_imports import doctest
from lxml.doctestcompare import LHTMLOutputChecker
-from lxml.html.clean import clean, Cleaner
+try:
+ from lxml.html.clean import clean, Cleaner
+ html_clean_available = True
+except ImportError:
+ html_clean_available = False
feed_dirs = [
os.path.join(os.path.dirname(__file__), 'feedparser-data'),
@@ -80,6 +84,11 @@ def shortDescription(self):
def test_suite():
suite = unittest.TestSuite()
+
+ if not html_clean_available:
+ print("Skipping tests in feedparser_data - external lxml_html_clean package is not installed")
+ return suite
+
for dir in feed_dirs:
for fn in os.listdir(dir):
fn = os.path.join(dir, fn)

@ -1,104 +0,0 @@
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 5eb3416..88a031d 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
c_ns = c_node.nsDef
while c_ns is not NULL:
- prefix = funicodeOrNone(c_ns.prefix)
- if prefix not in nsmap:
- nsmap[prefix] = funicodeOrNone(c_ns.href)
+ if c_ns.prefix or c_ns.href:
+ prefix = funicodeOrNone(c_ns.prefix)
+ if prefix not in nsmap:
+ nsmap[prefix] = funicodeOrNone(c_ns.href)
c_ns = c_ns.next
c_node = c_node.parent
return nsmap
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
index a196e34..45acfc8 100644
--- a/src/lxml/includes/xmlparser.pxd
+++ b/src/lxml/includes/xmlparser.pxd
@@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h":
void* userData
int* spaceTab
int spaceMax
+ int nsNr
bint html
bint progressive
int inSubset
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
index 4c20506..3da7485 100644
--- a/src/lxml/iterparse.pxi
+++ b/src/lxml/iterparse.pxi
@@ -419,7 +419,7 @@ cdef int _countNsDefs(xmlNode* c_node):
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
- count += 1
+ count += (c_ns.href is not NULL)
c_ns = c_ns.next
return count
@@ -430,9 +430,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
- funicode(c_ns.href))
- event_list.append( (u"start-ns", ns_tuple) )
- count += 1
+ if c_ns.href:
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
+ funicode(c_ns.href))
+ event_list.append( (u"start-ns", ns_tuple) )
+ count += 1
c_ns = c_ns.next
return count
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index 3ed223b..f5ff6b2 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext):
self._c_ctxt.disableSAX = 0 # work around bug in libxml2
else:
xmlparser.xmlClearParserCtxt(self._c_ctxt)
+ # work around bug in libxml2 [2.9.10 .. 2.9.14]:
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
+ self._c_ctxt.nsNr = 0
cdef int prepare(self, bint set_document_loader=True) except -1:
cdef int result
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 42613dc..db1f560 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1459,6 +1459,27 @@ class ETreeOnlyTestCase(HelperTestCase):
[1,2,1,4],
counts)
+ def test_walk_after_parse_failure(self):
+ # This used to be an issue because libxml2 can leak empty namespaces
+ # between failed parser runs. iterwalk() failed to handle such a tree.
+ parser = etree.XMLParser()
+
+ try:
+ etree.XML('''<anot xmlns="1">''', parser=parser)
+ except etree.XMLSyntaxError:
+ pass
+ else:
+ assert False, "invalid input did not fail to parse"
+
+ et = etree.XML('''<root> </root>''', parser=parser)
+ try:
+ ns = next(etree.iterwalk(et, events=('start-ns',)))
+ except StopIteration:
+ # This would be the expected result, because there was no namespace
+ pass
+ else:
+ assert False, "Found unexpected namespace '%s'" % ns
+
def test_itertext_comment_pi(self):
# https://bugs.launchpad.net/lxml/+bug/1844674
XML = self.etree.XML

@ -0,0 +1,28 @@
#! /bin/bash -ex
# Download a release of lxml (if missing) and remove the isoschematron module from it
version=$1
if [ -z "${version}" ]; then
echo "Usage: $0 VERSION" >& 2
echo "" >& 2
echo "example: $0 4.9.2" >& 2
exit 1
fi
versionedname=lxml-${version}
orig_archive=${versionedname}.tar.gz
new_archive=${versionedname}-no-isoschematron-rng.tar.gz
if [ ! -e ${orig_archive} ]; then
wget -N https://files.pythonhosted.org/packages/source/l/lxml/${orig_archive}
fi
deleted_directory=lxml-${version}/src/lxml/isoschematron/resources/rng
# tar --delete does not operate on compressed archives, so do
# gz decompression explicitly
gzip --decompress ${orig_archive}
tar -v --delete -f ${orig_archive//.gz} ${deleted_directory}
gzip -cf ${orig_archive//.gz} > ${new_archive}

@ -1,110 +1,226 @@
%global modname lxml
Name: python-%{modname}
Version: 4.6.5
Release: 3%{?dist}
## START: Set by rpmautospec
## (rpmautospec version 0.6.1)
## RPMAUTOSPEC: autorelease, autochangelog
%define autorelease(e:s:pb:n) %{?-p:0.}%{lua:
release_number = 2;
base_release_number = tonumber(rpm.expand("%{?-b*}%{!?-b:1}"));
print(release_number + base_release_number - 1);
}%{?-e:.%{-e*}}%{?-s:.%{-s*}}%{!?-n:%{?dist}}
## END: Set by rpmautospec
Name: python-lxml
Version: 5.2.1
Release: %autorelease
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
# The lxml project is licensed under BSD
# The lxml project is licensed under BSD-3-Clause
# Some code is derived from ElementTree and cElementTree
# thus using the MIT-like elementtree license
# .xsl schematron files are under the MIT and zlib license
License: BSD and MIT and zlib
# thus using the MIT-CMU elementtree license
# .xsl schematron files are under the MIT license
License: BSD-3-Clause AND MIT-CMU AND MIT
URL: https://github.com/lxml/lxml
Source0: %{pypi_source %{modname}}
# Fix for CVE-2022-2309
# Resolved upstream:
# https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f
# https://github.com/lxml/lxml/commit/d01872ccdf7e1e5e825b6c6292b43e7d27ae5fc4
# https://github.com/lxml/lxml/commit/c742576c105f40fc8b754fcae56fee4aa35840a3
# Tracking bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2107571
Patch0: CVE-2022-2309.patch
# We use the get-lxml-source.sh script to generate the tarball
# without the isoschematron RNG validation file under a problematic license.
# See: https://gitlab.com/fedora/legal/fedora-license-data/-/issues/154
Source0: lxml-%{version}-no-isoschematron-rng.tar.gz
Source1: get-lxml-source.sh
# Skip some tests if lxml_html_clean is not available
Patch: https://github.com/lxml/lxml/pull/417.patch
BuildRequires: gcc
BuildRequires: libxml2-devel
BuildRequires: libxslt-devel
BuildRequires: python3-devel
# Some of the extras create a build dependency loop.
# - [cssselect] Requires cssselect BuildRequires lxml
# - [html5] Requires html5lib BuildRequires lxml
# - [htmlsoup] Requires beautifulsoup4 Requires lxml
# - [html_clean] Requires lxml-html-clean Requires lxml
# Hence we provide a bcond to disable the extras altogether.
# By default, the extras are disabled in RHEL, to avoid dependencies.
%bcond extras %{undefined rhel}
%global _description \
lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. It\
provides safe and convenient access to these libraries using the ElementTree It\
extends the ElementTree API significantly to offer support for XPath, RelaxNG,\
XML Schema, XSLT, C14N and much more.To contact the project, go to the project\
home page < or see our bug tracker at case you want to use the current ...
XML Schema, XSLT, C14N and much more.
%description %{_description}
%package -n python3-%{modname}
%package -n python3-lxml
Summary: %{summary}
BuildRequires: python3-devel
BuildRequires: python3-setuptools
BuildRequires: python3-Cython
Suggests: python%{python3_version}dist(cssselect) >= 0.7
Suggests: python%{python3_version}dist(html5lib)
Suggests: python%{python3_version}dist(beautifulsoup4)
%{?python_provide:%python_provide python3-%{modname}}
%if %{with extras}
Suggests: python3-lxml+cssselect
Suggests: python3-lxml+html5
Suggests: python3-lxml+htmlsoup
Suggests: python3-lxml+html_clean
%endif
%description -n python3-%{modname} %{_description}
%description -n python3-lxml %{_description}
Python 3 version.
%if %{with extras}
%pyproject_extras_subpkg -n python3-lxml cssselect html5 htmlsoup html_clean
%endif
%prep
%autosetup -n %{modname}-%{version} -p1
# Remove pregenerated Cython C sources
find -type f -name '*.c' -print -delete
%autosetup -n lxml-%{version} -p1
# Don't run html5lib tests --without extras
%{!?without_extras:rm src/lxml/html/tests/test_html5parser.py}
# Remove limit for version of Cython
sed -i "s/Cython.*/Cython/" requirements.txt
sed -i 's/"Cython.*",/"Cython",/' pyproject.toml
%generate_buildrequires
%pyproject_buildrequires -x source%{?with_extras:,cssselect,html5,htmlsoup,html_clean}
%build
env WITH_CYTHON=true %py3_build
# Remove pregenerated Cython C sources
# We need to do this after %%pyproject_buildrequires because setup.py errors
# without Cython and without the .c files.
find -type f -name '*.c' -print -delete >&2
export WITH_CYTHON=true
%pyproject_wheel
%install
%py3_install
%pyproject_install
%pyproject_save_files lxml
%check
# The tests assume inplace build, so we copy the built library to source-dir.
# If not done that, Python can either import the tests or the extension modules, but not both.
cp -a build/lib.%{python3_platform}-%{python3_version}/* src/
cp -a build/lib.%{python3_platform}-*/* src/
# The options are: verbose, unit, functional
%{python3} test.py -vuf
%files -n python3-%{modname}
%license LICENSES.txt doc/licenses/BSD.txt doc/licenses/elementtree.txt
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
%{python3_sitearch}/%{modname}/
%{python3_sitearch}/%{modname}-*.egg-info/
%files -n python3-lxml -f %{pyproject_files}
%license doc/licenses/BSD.txt doc/licenses/elementtree.txt
%doc README.rst
%changelog
* Wed Jul 27 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.6.5-3
- Security fix for CVE-2022-2309
- Resolves: rhbz#2107571
## START: Generated by rpmautospec
* Mon Jun 24 2024 Troy Dawson <tdawson@redhat.com> - 5.2.1-2
- Bump release for June 2024 mass rebuild
* Thu Apr 04 2024 Lumir Balhar <lbalhar@redhat.com> - 5.2.1-1
- Update to 5.2.1 (rhbz#2272165)
* Wed Mar 06 2024 Miro Hrončok <miro@hroncok.cz> - 5.1.0-7
- Revert "Reduce the type safety as a workaround for build failures in
Fedora 40+"
- The workaround has been moved to Cython
* Mon Mar 04 2024 Miro Hrončok <miro@hroncok.cz> - 5.1.0-6
- Fix test_elementtree with Expat 2.6.0
- Fixes: rhbz#2264859
* Wed Feb 07 2024 Miro Hrončok <miro@hroncok.cz> - 5.1.0-5
- Reduce the type safety as a workaround for build failures in Fedora 40+
- Fixes: rhbz#2261034
- Fixes: rhbz#2256232
* Mon Jan 22 2024 Fedora Release Engineering <releng@fedoraproject.org> - 5.1.0-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
* Mon Jan 08 2024 Lumír Balhar <lbalhar@redhat.com> - 5.1.0-1
- Update to 5.1.0 (rhbz#2256232)
* Wed Dec 20 2023 Lumír Balhar <lbalhar@redhat.com> - 4.9.4-1
- Update to 4.9.4 (rhbz#2255267)
* Sun Nov 26 2023 David King <amigadave@amigadave.com> - 4.9.3-4
- Fix building against libxml2 2.12.0
- Resolves: rhbz#2250838
* Mon Oct 30 2023 Miro Hrončok <mhroncok@redhat.com> - 4.9.3-3
- Fix build with a future mock version
* Tue Feb 08 2022 Tomas Orsava <torsava@redhat.com> - 4.6.5-2
- Add automatically generated Obsoletes tag with the python39- prefix
for smoother upgrade from RHEL8
- Related: rhbz#1990421
* Fri Jul 28 2023 Miro Hrončok <mhroncok@redhat.com> - 4.9.3-2
- Fix build with Cython 3
* Thu Jan 06 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.6.5-1
- Update to 4.6.5
* Fri Jul 21 2023 Lumír Balhar <lbalhar@redhat.com> - 4.9.3-1
- Update to 4.9.3 (rhbz#2219811)
* Fri Jul 21 2023 Fedora Release Engineering <releng@fedoraproject.org> - 4.9.2-9
- Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild
* Fri Jul 14 2023 Miro Hrončok <mhroncok@redhat.com> - 4.9.2-8
- Bring back the isoschematron submodule,
but without the validation of the schema file itself
* Fri Jun 16 2023 Python Maint <python-maint@redhat.com> - 4.9.2-7
- Rebuilt for Python 3.12
* Tue Jun 13 2023 Python Maint <python-maint@redhat.com> - 4.9.2-6
- Bootstrap for Python 3.12
* Wed May 31 2023 Miro Hrončok <mhroncok@redhat.com> - 4.9.2-5
- Remove the isoschematron submodule
* Tue May 30 2023 Yaakov Selkowitz <yselkowi@redhat.com> - 4.9.2-4
- Disable extra subpackages in RHEL builds
* Mon May 29 2023 Tomáš Hrnčiar <thrnciar@redhat.com> - 4.9.2-3
- Skip failing test to avoid FTBFS
* Fri Jan 20 2023 Fedora Release Engineering <releng@fedoraproject.org> - 4.9.2-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_38_Mass_Rebuild
* Wed Dec 14 2022 Lumír Balhar <lbalhar@redhat.com> - 4.9.2-1
- Update to 4.9.2 (rhbz#2153063)
* Wed Sep 14 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.9.1-1
- Update to 4.9.1
- Fix for CVE-2022-2309
- Resolves: rhbz#2107571, rhbz#2110131
* Wed Aug 31 2022 Miro Hrončok <mhroncok@redhat.com> - 4.7.1-6
- Use SPDX license identifiers
- The schematron files are not Zlib licensed, but MIT
- Package the lxml[cssselect], lxml[html5] and lxml[htmlsoup] extras
* Fri Jul 22 2022 Fedora Release Engineering <releng@fedoraproject.org> - 4.7.1-5
- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild
* Wed Jun 22 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.7.1-4
- Fix FTBFS with setuptools >= 62.1
- Resolves: rhbz#2097102
* Mon Jun 13 2022 Python Maint <python-maint@redhat.com> - 4.7.1-3
- Rebuilt for Python 3.11
* Fri Jan 21 2022 Fedora Release Engineering <releng@fedoraproject.org> - 4.7.1-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild
* Thu Jan 06 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.7.1-1
- Update to 4.7.1
- Fixes CVE-2021-43818
- Resolves: rhbz#2032569
- Resolves: rhbz#2031686, rhbz#2032572
* Fri Nov 26 2021 Miro Hrončok <mhroncok@redhat.com> - 4.6.3-5
- Run the tests during build
- Resolves: rhbz#2026941
* Tue Aug 10 2021 Mohan Boddu <mboddu@redhat.com> - 4.6.3-3
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
Related: rhbz#1991688
* Fri Jul 23 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4.6.3-4
- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild
* Thu Jun 03 2021 Charalampos Stratakis <cstratak@redhat.com> - 4.6.3-2
* Thu Jun 03 2021 Charalampos Stratakis <cstratak@redhat.com> - 4.6.3-3
- Update the license information
* Wed Jun 02 2021 Python Maint <python-maint@redhat.com> - 4.6.3-2
- Rebuilt for Python 3.10
* Thu May 20 2021 Charalampos Stratakis <cstratak@redhat.com> - 4.6.3-1
- Update to 4.6.3
- Fixes CVE-2021-28957
Resolves: rhbz#1941534
* Fri Apr 16 2021 Mohan Boddu <mboddu@redhat.com> - 4.6.2-3
- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937
- Fixes: rhbz#1941773
- Fixes: rhbz#1941535
* Wed Jan 27 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4.6.2-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
@ -1651,3 +1767,5 @@ Resolves: rhbz#1941534
* Tue Dec 13 2005 Shahms E. King <shahms@shahms.com> 0.8-1
- Initial package
## END: Generated by rpmautospec

Loading…
Cancel
Save