From f2073d2a174381675efaa4f22a7c463a3e1fbc3d Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Fri, 12 Jul 2024 03:01:32 +0300 Subject: [PATCH] import ruby-2.5.9-112.module+el8.10.0+22021+135c76a8 --- ...ms-incomplete-fix-for-CVE-2023-28755.patch | 44 + ...24-27280-Buffer-overread-in-StringIO.patch | 81 + ...-RCE-vulnerability-with-rdoc_options.patch | 203 ++ ...27282-Memory-address-read-with-Regex.patch | 48 + ...uby-net-http-Renew-test-certificates.patch | 236 +++ ....2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch | 1662 +++++++++++++++++ ...can-1.0.2-Accept-String-as-a-pattern.patch | 264 +++ SPECS/ruby.spec | 74 +- 8 files changed, 2611 insertions(+), 1 deletion(-) create mode 100644 SOURCES/ruby-3.0.7-Fix-CVE-2023-36617-Upstreams-incomplete-fix-for-CVE-2023-28755.patch create mode 100644 SOURCES/ruby-3.0.7-Fix-CVE-2024-27280-Buffer-overread-in-StringIO.patch create mode 100644 SOURCES/ruby-3.0.7-Fix-CVE-2024-27281-RCE-vulnerability-with-rdoc_options.patch create mode 100644 SOURCES/ruby-3.0.7-Fix-CVE-2024-27282-Memory-address-read-with-Regex.patch create mode 100644 SOURCES/ruby-3.4.0-ruby-net-http-Renew-test-certificates.patch create mode 100644 SOURCES/rubygem-rexml-3.2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch create mode 100644 SOURCES/rubygem-strscan-1.0.2-Accept-String-as-a-pattern.patch diff --git a/SOURCES/ruby-3.0.7-Fix-CVE-2023-36617-Upstreams-incomplete-fix-for-CVE-2023-28755.patch b/SOURCES/ruby-3.0.7-Fix-CVE-2023-36617-Upstreams-incomplete-fix-for-CVE-2023-28755.patch new file mode 100644 index 0000000..090d64c --- /dev/null +++ b/SOURCES/ruby-3.0.7-Fix-CVE-2023-36617-Upstreams-incomplete-fix-for-CVE-2023-28755.patch @@ -0,0 +1,44 @@ +From 5d08bbb0415c2ecc10037837b81e6a27d40ee7be Mon Sep 17 00:00:00 2001 +From: Hiroshi SHIBATA +Date: Thu, 29 Jun 2023 22:25:17 +0900 +Subject: [PATCH] CVE-2023-36617 for Ruby 3.0 (#7997) + +* Merge URI-0.10.3 + +--- + +Backport note, bundler is not distributed the same as RubyGems in Ruby +2.5, therefore we do not use backport for Bundler. +--- + lib/uri/rfc2396_parser.rb | 4 ++-- + lib/uri/rfc3986_parser.rb | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb +index b9e7b2b26e..c7c3ecd96d 100644 +--- a/lib/uri/rfc2396_parser.rb ++++ b/lib/uri/rfc2396_parser.rb +@@ -502,8 +502,8 @@ def initialize_regexp(pattern) + ret = {} + + # for URI::split +- ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED) +- ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED) ++ ret[:ABS_URI] = Regexp.new('\A\s*+' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED) ++ ret[:REL_URI] = Regexp.new('\A\s*+' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED) + + # for URI::extract + ret[:URI_REF] = Regexp.new(pattern[:URI_REF]) +diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb +index ad32368cfa..1accd03376 100644 +--- a/lib/uri/rfc3986_parser.rb ++++ b/lib/uri/rfc3986_parser.rb +@@ -106,7 +106,7 @@ def default_regexp # :nodoc: + QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, + FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, + OPAQUE: /\A(?:[^\/].*)?\z/, +- PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/, ++ PORT: /\A[\x09\x0a\x0c\x0d ]*+\d*[\x09\x0a\x0c\x0d ]*\z/, + } + end + diff --git a/SOURCES/ruby-3.0.7-Fix-CVE-2024-27280-Buffer-overread-in-StringIO.patch b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27280-Buffer-overread-in-StringIO.patch new file mode 100644 index 0000000..2ad07bd --- /dev/null +++ b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27280-Buffer-overread-in-StringIO.patch @@ -0,0 +1,81 @@ +From 740289bf02c9bea54f75b702f62862c62c62672b Mon Sep 17 00:00:00 2001 +From: Hiroshi SHIBATA +Date: Thu, 21 Mar 2024 15:55:48 +0900 +Subject: [PATCH] Merge StringIO 3.0.1.1 + +--- + ext/stringio/stringio.c | 2 +- + test/stringio/test_stringio.rb | 27 ++++++++++++++++++++++----- + 2 files changed, 23 insertions(+), 6 deletions(-) + +diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c +index f537054b5d..946ae06da4 100644 +--- a/ext/stringio/stringio.c ++++ b/ext/stringio/stringio.c +@@ -833,7 +833,7 @@ strio_unget_bytes(struct StringIO *ptr, const char *cp, long cl) + len = RSTRING_LEN(str); + rest = pos - len; + if (cl > pos) { +- long ex = (rest < 0 ? cl-pos : cl+rest); ++ long ex = cl - (rest < 0 ? pos : len); + rb_str_modify_expand(str, ex); + rb_str_set_len(str, len + ex); + s = RSTRING_PTR(str); +diff --git a/test/stringio/test_stringio.rb b/test/stringio/test_stringio.rb +index f5169f641a..c055b901e3 100644 +--- a/test/stringio/test_stringio.rb ++++ b/test/stringio/test_stringio.rb +@@ -693,6 +693,15 @@ def test_ungetc_padding + assert_equal("b""\0""a", s.string) + end + ++ def test_ungetc_fill ++ count = 100 ++ s = StringIO.new ++ s.print 'a' * count ++ s.ungetc('b' * (count * 5)) ++ assert_equal((count * 5), s.string.size) ++ assert_match(/\Ab+\z/, s.string) ++ end ++ + def test_ungetbyte_pos + b = '\\b00010001 \\B00010001 \\b1 \\B1 \\b000100011' + s = StringIO.new( b ) +@@ -718,6 +727,15 @@ def test_ungetbyte_padding + assert_equal("b""\0""a", s.string) + end + ++ def test_ungetbyte_fill ++ count = 100 ++ s = StringIO.new ++ s.print 'a' * count ++ s.ungetbyte('b' * (count * 5)) ++ assert_equal((count * 5), s.string.size) ++ assert_match(/\Ab+\z/, s.string) ++ end ++ + def test_frozen + s = StringIO.new + s.freeze +@@ -760,18 +778,17 @@ def test_new_block_warning + end + + def test_overflow +- skip if RbConfig::SIZEOF["void*"] > RbConfig::SIZEOF["long"] ++ return if RbConfig::SIZEOF["void*"] > RbConfig::SIZEOF["long"] + limit = (1 << (RbConfig::SIZEOF["void*"]*8-1)) - 0x10 + assert_separately(%w[-rstringio], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + limit = #{limit} + ary = [] +- while true ++ begin + x = "a"*0x100000 + break if [x].pack("p").unpack("i!")[0] < 0 + ary << x +- skip if ary.size > 100 +- end ++ end while ary.size <= 100 + s = StringIO.new(x) + s.gets("xxx", limit) + assert_equal(0x100000, s.pos) diff --git a/SOURCES/ruby-3.0.7-Fix-CVE-2024-27281-RCE-vulnerability-with-rdoc_options.patch b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27281-RCE-vulnerability-with-rdoc_options.patch new file mode 100644 index 0000000..9816d13 --- /dev/null +++ b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27281-RCE-vulnerability-with-rdoc_options.patch @@ -0,0 +1,203 @@ +From 7957a25edf844c966de45848fa7e9e2513955660 Mon Sep 17 00:00:00 2001 +From: Hiroshi SHIBATA +Date: Thu, 21 Mar 2024 15:47:40 +0900 +Subject: [PATCH 1/2] Merge RDoc-6.3.4.1 + +--- + lib/rdoc/rdoc.rb | 3 ++- + lib/rdoc/store.rb | 45 ++++++++++++++++++++-------------- + test/rdoc/test_rdoc_options.rb | 6 ++--- + 3 files changed, 31 insertions(+), 23 deletions(-) + +diff --git a/lib/rdoc/rdoc.rb b/lib/rdoc/rdoc.rb +index a2711fbbd1..c5690fc3b4 100644 +--- a/lib/rdoc/rdoc.rb ++++ b/lib/rdoc/rdoc.rb +@@ -162,8 +162,9 @@ def load_options + RDoc.load_yaml + + begin +- options = YAML.load_file '.rdoc_options' ++ options = YAML.safe_load_file '.rdoc_options', permitted_classes: [RDoc::Options, Symbol] + rescue Psych::SyntaxError ++ raise RDoc::Error, "#{options_file} is not a valid rdoc options file" + end + + raise RDoc::Error, "#{options_file} is not a valid rdoc options file" unless +diff --git a/lib/rdoc/store.rb b/lib/rdoc/store.rb +index 999aa76f92..07d03e90f7 100644 +--- a/lib/rdoc/store.rb ++++ b/lib/rdoc/store.rb +@@ -539,9 +539,7 @@ def load_all + def load_cache + #orig_enc = @encoding + +- open cache_path, 'rb' do |io| +- @cache = Marshal.load io.read +- end ++ @cache = marshal_load(cache_path) + + load_enc = @cache[:encoding] + +@@ -596,9 +594,7 @@ def load_class klass_name + def load_class_data klass_name + file = class_file klass_name + +- open file, 'rb' do |io| +- Marshal.load io.read +- end ++ marshal_load(file) + rescue Errno::ENOENT => e + error = MissingFileError.new(self, file, klass_name) + error.set_backtrace e.backtrace +@@ -611,14 +607,10 @@ def load_class_data klass_name + def load_method klass_name, method_name + file = method_file klass_name, method_name + +- open file, 'rb' do |io| +- obj = Marshal.load io.read +- obj.store = self +- obj.parent = +- find_class_or_module(klass_name) || load_class(klass_name) unless +- obj.parent +- obj +- end ++ obj = marshal_load(file) ++ obj.store = self ++ obj.parent ||= find_class_or_module(klass_name) || load_class(klass_name) ++ obj + rescue Errno::ENOENT => e + error = MissingFileError.new(self, file, klass_name + method_name) + error.set_backtrace e.backtrace +@@ -631,11 +623,9 @@ def load_method klass_name, method_name + def load_page page_name + file = page_file page_name + +- open file, 'rb' do |io| +- obj = Marshal.load io.read +- obj.store = self +- obj +- end ++ obj = marshal_load(file) ++ obj.store = self ++ obj + rescue Errno::ENOENT => e + error = MissingFileError.new(self, file, page_name) + error.set_backtrace e.backtrace +@@ -965,4 +955,21 @@ def unique_modules + @unique_modules + end + ++ private ++ def marshal_load(file) ++ File.open(file, 'rb') {|io| Marshal.load(io, MarshalFilter)} ++ end ++ ++ MarshalFilter = proc do |obj| ++ case obj ++ when true, false, nil, Array, Class, Encoding, Hash, Integer, String, Symbol, RDoc::Text ++ else ++ unless obj.class.name.start_with?("RDoc::") ++ raise TypeError, "not permitted class: #{obj.class.name}" ++ end ++ end ++ obj ++ end ++ private_constant :MarshalFilter ++ + end +diff --git a/test/rdoc/test_rdoc_options.rb b/test/rdoc/test_rdoc_options.rb +index 400ed9a549..247c7c87ce 100644 +--- a/test/rdoc/test_rdoc_options.rb ++++ b/test/rdoc/test_rdoc_options.rb +@@ -145,7 +145,7 @@ def test_init_with_encoding + + @options.encoding = Encoding::IBM437 + +- options = YAML.load YAML.dump @options ++ options = YAML.safe_load(YAML.dump(@options), permitted_classes: [RDoc::Options, Symbol]) + + assert_equal Encoding::IBM437, options.encoding + end +@@ -161,7 +161,7 @@ def test_init_with_trim_paths + - /etc + YAML + +- options = YAML.load yaml ++ options = YAML.safe_load(yaml, permitted_classes: [RDoc::Options, Symbol]) + + assert_empty options.rdoc_include + assert_empty options.static_path +@@ -729,7 +729,7 @@ def test_write_options + + assert File.exist? '.rdoc_options' + +- assert_equal @options, YAML.load(File.read('.rdoc_options')) ++ assert_equal @options, YAML.safe_load(File.read('.rdoc_options'), permitted_classes: [RDoc::Options, Symbol]) + end + end + + +From 153a4d16058783c923d0df5b1cbe2610ef96e3a8 Mon Sep 17 00:00:00 2001 +From: Jarek Prokop +Date: Tue, 28 May 2024 16:56:26 +0200 +Subject: [PATCH 2/2] Port the rebase to work with Ruby 2.5.9. + +Ruby 2.5's Psych does not have safe_load_file method. +However, from Ruby 3.3's sources, the method is just File.read +simple wrapper with a safe_load call. Therefore it was copied over to +the lib/rdoc/rdoc.rb file. +--- + lib/rdoc/rdoc.rb | 9 ++++++- + test/rdoc/test_rdoc_options.rb | 6 +++--- + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/lib/rdoc/rdoc.rb b/lib/rdoc/rdoc.rb +index c5690fc3b4..435cd2eaf0 100644 +--- a/lib/rdoc/rdoc.rb ++++ b/lib/rdoc/rdoc.rb +@@ -162,7 +162,12 @@ def load_options + RDoc.load_yaml + + begin +- options = YAML.safe_load_file '.rdoc_options', permitted_classes: [RDoc::Options, Symbol] ++ # Opening file inspired from Ruby 3.3.0 sources, ++ # file 'ext/psych/lib/psych.rb', line 658. ++ # https://github.com/ruby/ruby/blob/v3_3_0/ext/psych/lib/psych.rb#L658 ++ options = File.open('.rdoc_options', 'r:bom|utf-8') do |file| ++ YAML.safe_load file, [RDoc::Options, Symbol], [], false, '.rdoc_options' ++ end + rescue Psych::SyntaxError + raise RDoc::Error, "#{options_file} is not a valid rdoc options file" + end +diff --git a/test/rdoc/test_rdoc_options.rb b/test/rdoc/test_rdoc_options.rb +index 247c7c87ce..60fe035dce 100644 +--- a/test/rdoc/test_rdoc_options.rb ++++ b/test/rdoc/test_rdoc_options.rb +@@ -145,7 +145,7 @@ def test_init_with_encoding + + @options.encoding = Encoding::IBM437 + +- options = YAML.safe_load(YAML.dump(@options), permitted_classes: [RDoc::Options, Symbol]) ++ options = YAML.safe_load(YAML.dump(@options), [RDoc::Options, Symbol]) + + assert_equal Encoding::IBM437, options.encoding + end +@@ -161,7 +161,7 @@ def test_init_with_trim_paths + - /etc + YAML + +- options = YAML.safe_load(yaml, permitted_classes: [RDoc::Options, Symbol]) ++ options = YAML.safe_load(yaml, [RDoc::Options, Symbol]) + + assert_empty options.rdoc_include + assert_empty options.static_path +@@ -729,7 +729,7 @@ def test_write_options + + assert File.exist? '.rdoc_options' + +- assert_equal @options, YAML.safe_load(File.read('.rdoc_options'), permitted_classes: [RDoc::Options, Symbol]) ++ assert_equal @options, YAML.safe_load(File.read('.rdoc_options'), [RDoc::Options, Symbol]) + end + end + diff --git a/SOURCES/ruby-3.0.7-Fix-CVE-2024-27282-Memory-address-read-with-Regex.patch b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27282-Memory-address-read-with-Regex.patch new file mode 100644 index 0000000..48d91e2 --- /dev/null +++ b/SOURCES/ruby-3.0.7-Fix-CVE-2024-27282-Memory-address-read-with-Regex.patch @@ -0,0 +1,48 @@ +From 9eda3000e3efd5bdd4ed60d07e2f43633e39d361 Mon Sep 17 00:00:00 2001 +From: Hiroshi SHIBATA +Date: Tue, 23 Apr 2024 19:22:22 +0900 +Subject: [PATCH] merge revision(s) 33e5b47c16f1fd3382186e6ffe73cfc6e00946f7: + + Fix handling of reg->dmin in Regex matching + + --- + regexec.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) +--- + regexec.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/regexec.c b/regexec.c +index 4582c35c3f..36ac1d4bc5 100644 +--- a/regexec.c ++++ b/regexec.c +@@ -3900,12 +3900,17 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) + { + UChar *p, *pprev = (UChar* )NULL; ++ size_t input_len = end - str; + + #ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", + (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); + #endif + ++ if (reg->dmin > input_len) { ++ return 0; ++ } ++ + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { +@@ -4042,6 +4047,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar** low, UChar** high) + { + UChar *p; ++ size_t input_len = end - str; ++ ++ if (reg->dmin > input_len) { ++ return 0; ++ } + + range += reg->dmin; + p = s; diff --git a/SOURCES/ruby-3.4.0-ruby-net-http-Renew-test-certificates.patch b/SOURCES/ruby-3.4.0-ruby-net-http-Renew-test-certificates.patch new file mode 100644 index 0000000..a1554a6 --- /dev/null +++ b/SOURCES/ruby-3.4.0-ruby-net-http-Renew-test-certificates.patch @@ -0,0 +1,236 @@ +From d3933fc753187a055a4904af82f5f3794c88c416 Mon Sep 17 00:00:00 2001 +From: Sorah Fukumori +Date: Mon, 1 Jan 2024 20:45:54 +0900 +Subject: [PATCH] [ruby/net-http] Renew test certificates + +The private key is replaced with a public known test key published at +[RFC 9500]. + +Also lifetime has been extended to 10 years from 4 years. + +[RFC 9500]: https://www.rfc-editor.org/rfc/rfc9500.html + +https://github.com/ruby/net-http/commit/4ab6c4a500 +--- + test/net/fixtures/cacert.pem | 44 ++++++++-------- + test/net/fixtures/server.crt | 99 +++++++----------------------------- + test/net/fixtures/server.key | 55 ++++++++++---------- + 4 files changed, 68 insertions(+), 130 deletions(-) + +diff --git a/test/net/fixtures/cacert.pem b/test/net/fixtures/cacert.pem +index f623bd62ed375..24c83f1c65225 100644 +--- a/test/net/fixtures/cacert.pem ++++ b/test/net/fixtures/cacert.pem +@@ -1,24 +1,24 @@ + -----BEGIN CERTIFICATE----- +-MIID7TCCAtWgAwIBAgIJAIltvxrFAuSnMA0GCSqGSIb3DQEBCwUAMIGMMQswCQYD +-VQQGEwJKUDEQMA4GA1UECAwHU2hpbWFuZTEUMBIGA1UEBwwLTWF0ei1lIGNpdHkx +-FzAVBgNVBAoMDlJ1YnkgQ29yZSBUZWFtMRUwEwYDVQQDDAxSdWJ5IFRlc3QgQ0Ex +-JTAjBgkqhkiG9w0BCQEWFnNlY3VyaXR5QHJ1YnktbGFuZy5vcmcwHhcNMTkwMTAy +-MDI1ODI4WhcNMjQwMTAxMDI1ODI4WjCBjDELMAkGA1UEBhMCSlAxEDAOBgNVBAgM +-B1NoaW1hbmUxFDASBgNVBAcMC01hdHotZSBjaXR5MRcwFQYDVQQKDA5SdWJ5IENv +-cmUgVGVhbTEVMBMGA1UEAwwMUnVieSBUZXN0IENBMSUwIwYJKoZIhvcNAQkBFhZz +-ZWN1cml0eUBydWJ5LWxhbmcub3JnMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +-CgKCAQEAznlbjRVhz1NlutHVrhcGnK8W0qug2ujKXv1njSC4U6nJF6py7I9EeehV +-SaKePyv+I9z3K1LnfUHOtUbdwdKC77yN66A6q2aqzu5q09/NSykcZGOIF0GuItYI +-3nvW3IqBddff2ffsyR+9pBjfb5AIPP08WowF9q4s1eGULwZc4w2B8PFhtxYANd7d +-BvGLXFlcufv9tDtzyRi4t7eqxCRJkZQIZNZ6DHHIJrNxejOILfHLarI12yk8VK6L +-2LG4WgGqyeePiRyd1o1MbuiAFYqAwpXNUbRKg5NaZGwBHZk8UZ+uFKt1QMBURO5R +-WFy1c349jbWszTqFyL4Lnbg9HhAowQIDAQABo1AwTjAdBgNVHQ4EFgQU9tEiKdU9 +-I9derQyc5nWPnc34nVMwHwYDVR0jBBgwFoAU9tEiKdU9I9derQyc5nWPnc34nVMw +-DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAxj7F/u3C3fgq24N7hGRA +-of7ClFQxGmo/IGT0AISzW3HiVYiFaikKhbO1NwD9aBpD8Zwe62sCqMh8jGV/b0+q +-aOORnWYNy2R6r9FkASAglmdF6xn3bhgGD5ls4pCvcG9FynGnGc24g6MrjFNrBYUS +-2iIZsg36i0IJswo/Dy6HLphCms2BMCD3DeWtfjePUiTmQHJo6HsQIKP/u4N4Fvee +-uMBInei2M4VU74fLXbmKl1F9AEX7JDP3BKSZG19Ch5pnUo4uXM1uNTGsi07P4Y0s +-K44+SKBC0bYEFbDK0eQWMrX3kIhkPxyIWhxdq9/NqPYjShuSEAhA6CSpmRg0pqc+ +-mA== ++MIID+zCCAuOgAwIBAgIUGMvHl3EhtKPKcgc3NQSAYfFuC+8wDQYJKoZIhvcNAQEL ++BQAwgYwxCzAJBgNVBAYTAkpQMRAwDgYDVQQIDAdTaGltYW5lMRQwEgYDVQQHDAtN ++YXR6LWUgY2l0eTEXMBUGA1UECgwOUnVieSBDb3JlIFRlYW0xFTATBgNVBAMMDFJ1 ++YnkgVGVzdCBDQTElMCMGCSqGSIb3DQEJARYWc2VjdXJpdHlAcnVieS1sYW5nLm9y ++ZzAeFw0yNDAxMDExMTQ3MjNaFw0zMzEyMjkxMTQ3MjNaMIGMMQswCQYDVQQGEwJK ++UDEQMA4GA1UECAwHU2hpbWFuZTEUMBIGA1UEBwwLTWF0ei1lIGNpdHkxFzAVBgNV ++BAoMDlJ1YnkgQ29yZSBUZWFtMRUwEwYDVQQDDAxSdWJ5IFRlc3QgQ0ExJTAjBgkq ++hkiG9w0BCQEWFnNlY3VyaXR5QHJ1YnktbGFuZy5vcmcwggEiMA0GCSqGSIb3DQEB ++AQUAA4IBDwAwggEKAoIBAQCw+egZQ6eumJKq3hfKfED4dE/tL4FI5sjqont9ABVI +++1GSqyi1bFBgsRjM0THllIdMbKmJtWwnKW8J+5OgNN8y6Xxv8JmM/Y5vQt2lis0f ++qXmG8UTz0VTWdlAXXmhUs6lSADvAaIe4RVrCsZ97L3ZQTryY7JRVcbB4khUN3Gp0 ++yg+801SXzoFTTa+UGIRLE66jH51aa5VXu99hnv1OiH8tQrjdi8mH6uG/icq4XuIe ++NWMF32wHqIOOPvQcWV3M5D2vxJEj702Ku6k9OQXkAo17qRSEonWW4HtLbtmS8He1 ++JNPc/n3dVUm+fM6NoDXPoLP7j55G9zKyqGtGAWXAj1MTAgMBAAGjUzBRMB0GA1Ud ++DgQWBBSJGVleDvFp9cu9R+E0/OKYzGkwkTAfBgNVHSMEGDAWgBSJGVleDvFp9cu9 ++R+E0/OKYzGkwkTAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBl ++8GLB8skAWlkSw/FwbUmEV3zyqu+p7PNP5YIYoZs0D74e7yVulGQ6PKMZH5hrZmHo ++orFSQU+VUUirG8nDGj7Rzce8WeWBxsaDGC8CE2dq6nC6LuUwtbdMnBrH0LRWAz48 ++jGFF3jHtVz8VsGfoZTZCjukWqNXvU6hETT9GsfU+PZqbqcTVRPH52+XgYayKdIbD ++r97RM4X3+aXBHcUW0b76eyyi65RR/Xtvn8ioZt2AdX7T2tZzJyXJN3Hupp77s6Ui ++AZR35SToHCZeTZD12YBvLBdaTPLZN7O/Q/aAO9ZiJaZ7SbFOjz813B2hxXab4Fob ++2uJX6eMWTVxYK5D4M9lm + -----END CERTIFICATE----- +diff --git a/test/net/fixtures/server.crt b/test/net/fixtures/server.crt +index 5ca78a6d146a0..5d2923795dabc 100644 +--- a/test/net/fixtures/server.crt ++++ b/test/net/fixtures/server.crt +@@ -1,82 +1,21 @@ +-Certificate: +- Data: +- Version: 3 (0x2) +- Serial Number: 2 (0x2) +- Signature Algorithm: sha256WithRSAEncryption +- Issuer: C=JP, ST=Shimane, L=Matz-e city, O=Ruby Core Team, CN=Ruby Test CA/emailAddress=security@ruby-lang.org +- Validity +- Not Before: Jan 2 03:27:13 2019 GMT +- Not After : Jan 1 03:27:13 2024 GMT +- Subject: C=JP, ST=Shimane, O=Ruby Core Team, OU=Ruby Test, CN=localhost +- Subject Public Key Info: +- Public Key Algorithm: rsaEncryption +- Public-Key: (2048 bit) +- Modulus: +- 00:e8:da:9c:01:2e:2b:10:ec:49:cd:5e:07:13:07: +- 9c:70:9e:c6:74:bc:13:c2:e1:6f:c6:82:fd:e3:48: +- e0:2c:a5:68:c7:9e:42:de:60:54:65:e6:6a:14:57: +- 7a:30:d0:cc:b5:b6:d9:c3:d2:df:c9:25:97:54:67: +- cf:f6:be:5e:cb:8b:ee:03:c5:e1:e2:f9:e7:f7:d1: +- 0c:47:f0:b8:da:33:5a:ad:41:ad:e7:b5:a2:7b:b7: +- bf:30:da:60:f8:e3:54:a2:bc:3a:fd:1b:74:d9:dc: +- 74:42:e9:29:be:df:ac:b4:4f:eb:32:f4:06:f1:e1: +- 8c:4b:a8:8b:fb:29:e7:b1:bf:1d:01:ee:73:0f:f9: +- 40:dc:d5:15:79:d9:c6:73:d0:c0:dd:cb:e4:da:19: +- 47:80:c6:14:04:72:fd:9a:7c:8f:11:82:76:49:04: +- 79:cc:f2:5c:31:22:95:13:3e:5d:40:a6:4d:e0:a3: +- 02:26:7d:52:3b:bb:ed:65:a1:0f:ed:6b:b0:3c:d4: +- de:61:15:5e:d3:dd:68:09:9f:4a:57:a5:c2:a9:6d: +- 86:92:c5:f4:a4:d4:b7:13:3b:52:63:24:05:e2:cc: +- e3:8a:3c:d4:35:34:2b:10:bb:58:72:e7:e1:8d:1d: +- 74:8c:61:16:20:3d:d0:1c:4e:8f:6e:fd:fe:64:10: +- 4f:41 +- Exponent: 65537 (0x10001) +- X509v3 extensions: +- X509v3 Basic Constraints: +- CA:FALSE +- Netscape Comment: +- OpenSSL Generated Certificate +- X509v3 Subject Key Identifier: +- ED:28:C2:7E:AB:4B:C8:E8:FE:55:6D:66:95:31:1C:2D:60:F9:02:36 +- X509v3 Authority Key Identifier: +- keyid:F6:D1:22:29:D5:3D:23:D7:5E:AD:0C:9C:E6:75:8F:9D:CD:F8:9D:53 +- +- Signature Algorithm: sha256WithRSAEncryption +- 1d:b8:c5:8b:72:41:20:65:ad:27:6f:15:63:06:26:12:8d:9c: +- ad:ca:f4:db:97:b4:90:cb:ff:35:94:bb:2a:a7:a1:ab:1e:35: +- 2d:a5:3f:c9:24:b0:1a:58:89:75:3e:81:0a:2c:4f:98:f9:51: +- fb:c0:a3:09:d0:0a:9b:e7:a2:b7:c3:60:40:c8:f4:6d:b2:6a: +- 56:12:17:4c:00:24:31:df:9c:60:ae:b1:68:54:a9:e6:b5:4a: +- 04:e6:92:05:86:d9:5a:dc:96:30:a5:58:de:14:99:0f:e5:15: +- 89:3e:9b:eb:80:e3:bd:83:c3:ea:33:35:4b:3e:2f:d3:0d:64: +- 93:67:7f:8d:f5:3f:0c:27:bc:37:5a:cc:d6:47:16:af:5a:62: +- d2:da:51:f8:74:06:6b:24:ad:28:68:08:98:37:7d:ed:0e:ab: +- 1e:82:61:05:d0:ba:75:a0:ab:21:b0:9a:fd:2b:54:86:1d:0d: +- 1f:c2:d4:77:1f:72:26:5e:ad:8a:9f:09:36:6d:44:be:74:c2: +- 5a:3e:ff:5c:9d:75:d6:38:7b:c5:39:f9:44:6e:a1:d1:8e:ff: +- 63:db:c4:bb:c6:91:92:ca:5c:60:9b:1d:eb:0a:de:08:ee:bf: +- da:76:03:65:62:29:8b:f8:7f:c7:86:73:1e:f6:1f:2d:89:69: +- fd:be:bd:6e + -----BEGIN CERTIFICATE----- +-MIID4zCCAsugAwIBAgIBAjANBgkqhkiG9w0BAQsFADCBjDELMAkGA1UEBhMCSlAx +-EDAOBgNVBAgMB1NoaW1hbmUxFDASBgNVBAcMC01hdHotZSBjaXR5MRcwFQYDVQQK +-DA5SdWJ5IENvcmUgVGVhbTEVMBMGA1UEAwwMUnVieSBUZXN0IENBMSUwIwYJKoZI +-hvcNAQkBFhZzZWN1cml0eUBydWJ5LWxhbmcub3JnMB4XDTE5MDEwMjAzMjcxM1oX +-DTI0MDEwMTAzMjcxM1owYDELMAkGA1UEBhMCSlAxEDAOBgNVBAgMB1NoaW1hbmUx +-FzAVBgNVBAoMDlJ1YnkgQ29yZSBUZWFtMRIwEAYDVQQLDAlSdWJ5IFRlc3QxEjAQ +-BgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB +-AOjanAEuKxDsSc1eBxMHnHCexnS8E8Lhb8aC/eNI4CylaMeeQt5gVGXmahRXejDQ +-zLW22cPS38kll1Rnz/a+XsuL7gPF4eL55/fRDEfwuNozWq1Bree1onu3vzDaYPjj +-VKK8Ov0bdNncdELpKb7frLRP6zL0BvHhjEuoi/sp57G/HQHucw/5QNzVFXnZxnPQ +-wN3L5NoZR4DGFARy/Zp8jxGCdkkEeczyXDEilRM+XUCmTeCjAiZ9Uju77WWhD+1r +-sDzU3mEVXtPdaAmfSlelwqlthpLF9KTUtxM7UmMkBeLM44o81DU0KxC7WHLn4Y0d +-dIxhFiA90BxOj279/mQQT0ECAwEAAaN7MHkwCQYDVR0TBAIwADAsBglghkgBhvhC +-AQ0EHxYdT3BlblNTTCBHZW5lcmF0ZWQgQ2VydGlmaWNhdGUwHQYDVR0OBBYEFO0o +-wn6rS8jo/lVtZpUxHC1g+QI2MB8GA1UdIwQYMBaAFPbRIinVPSPXXq0MnOZ1j53N +-+J1TMA0GCSqGSIb3DQEBCwUAA4IBAQAduMWLckEgZa0nbxVjBiYSjZytyvTbl7SQ +-y/81lLsqp6GrHjUtpT/JJLAaWIl1PoEKLE+Y+VH7wKMJ0Aqb56K3w2BAyPRtsmpW +-EhdMACQx35xgrrFoVKnmtUoE5pIFhtla3JYwpVjeFJkP5RWJPpvrgOO9g8PqMzVL +-Pi/TDWSTZ3+N9T8MJ7w3WszWRxavWmLS2lH4dAZrJK0oaAiYN33tDqsegmEF0Lp1 +-oKshsJr9K1SGHQ0fwtR3H3ImXq2Knwk2bUS+dMJaPv9cnXXWOHvFOflEbqHRjv9j +-28S7xpGSylxgmx3rCt4I7r/adgNlYimL+H/HhnMe9h8tiWn9vr1u ++MIIDYTCCAkkCAQAwDQYJKoZIhvcNAQELBQAwgYwxCzAJBgNVBAYTAkpQMRAwDgYD ++VQQIDAdTaGltYW5lMRQwEgYDVQQHDAtNYXR6LWUgY2l0eTEXMBUGA1UECgwOUnVi ++eSBDb3JlIFRlYW0xFTATBgNVBAMMDFJ1YnkgVGVzdCBDQTElMCMGCSqGSIb3DQEJ ++ARYWc2VjdXJpdHlAcnVieS1sYW5nLm9yZzAeFw0yNDAxMDExMTQ3MjNaFw0zMzEy ++MjkxMTQ3MjNaMGAxCzAJBgNVBAYTAkpQMRAwDgYDVQQIDAdTaGltYW5lMRcwFQYD ++VQQKDA5SdWJ5IENvcmUgVGVhbTESMBAGA1UECwwJUnVieSBUZXN0MRIwEAYDVQQD ++DAlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCw+egZ ++Q6eumJKq3hfKfED4dE/tL4FI5sjqont9ABVI+1GSqyi1bFBgsRjM0THllIdMbKmJ ++tWwnKW8J+5OgNN8y6Xxv8JmM/Y5vQt2lis0fqXmG8UTz0VTWdlAXXmhUs6lSADvA ++aIe4RVrCsZ97L3ZQTryY7JRVcbB4khUN3Gp0yg+801SXzoFTTa+UGIRLE66jH51a ++a5VXu99hnv1OiH8tQrjdi8mH6uG/icq4XuIeNWMF32wHqIOOPvQcWV3M5D2vxJEj ++702Ku6k9OQXkAo17qRSEonWW4HtLbtmS8He1JNPc/n3dVUm+fM6NoDXPoLP7j55G ++9zKyqGtGAWXAj1MTAgMBAAEwDQYJKoZIhvcNAQELBQADggEBACtGNdj5TEtnJBYp ++M+LhBeU3oNteldfycEm993gJp6ghWZFg23oX8fVmyEeJr/3Ca9bAgDqg0t9a0npN ++oWKEY6wVKqcHgu3gSvThF5c9KhGbeDDmlTSVVNQmXWX0K2d4lS2cwZHH8mCm2mrY ++PDqlEkSc7k4qSiqigdS8i80Yk+lDXWsm8CjsiC93qaRM7DnS0WPQR0c16S95oM6G ++VklFKUSDAuFjw9aVWA/nahOucjn0w5fVW6lyIlkBslC1ChlaDgJmvhz+Ol3iMsE0 ++kAmFNu2KKPVrpMWaBID49QwQTDyhetNLaVVFM88iUdA9JDoVMEuP1mm39JqyzHTu ++uBrdP4Q= + -----END CERTIFICATE----- +diff --git a/test/net/fixtures/server.key b/test/net/fixtures/server.key +index 7f2380e71e637..6a83d5bcf4a52 100644 +--- a/test/net/fixtures/server.key ++++ b/test/net/fixtures/server.key +@@ -1,28 +1,27 @@ +------BEGIN PRIVATE KEY----- +-MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDo2pwBLisQ7EnN +-XgcTB5xwnsZ0vBPC4W/Ggv3jSOAspWjHnkLeYFRl5moUV3ow0My1ttnD0t/JJZdU +-Z8/2vl7Li+4DxeHi+ef30QxH8LjaM1qtQa3ntaJ7t78w2mD441SivDr9G3TZ3HRC +-6Sm+36y0T+sy9Abx4YxLqIv7Keexvx0B7nMP+UDc1RV52cZz0MDdy+TaGUeAxhQE +-cv2afI8RgnZJBHnM8lwxIpUTPl1Apk3gowImfVI7u+1loQ/ta7A81N5hFV7T3WgJ +-n0pXpcKpbYaSxfSk1LcTO1JjJAXizOOKPNQ1NCsQu1hy5+GNHXSMYRYgPdAcTo9u +-/f5kEE9BAgMBAAECggEBAOHkwhc7DLh8IhTDNSW26oMu5OP2WU1jmiYAigDmf+OQ +-DBgrZj+JQBci8qINQxL8XLukSZn5hvQCLc7Kbyu1/wyEEUFDxSGGwwzclodr9kho +-LX2LDASPZrOSzD2+fPi2wTKmXKuS6Uc44OjQfZkYMNkz9r4Vkm8xGgOD3VipjIYX +-QXlhhdqkXZcNABsihCV52GKkDFSVm8jv95YJc5xhoYCy/3a4/qPdF0aT2R7oYUej +-hKrxVDskyooe8Zg/JTydZNV5GQEDmW01/K3r6XGT26oPi1AqMU1gtv/jkW56CRQQ +-1got8smnqM+AV7Slf9R6DauIPdQJ2S8wsr/o8ISBsOECgYEA9YrqEP2gAYSGFXRt +-liw0WI2Ant8BqXS6yvq1jLo/qWhLw/ph4Di73OQ2mpycVTpgfGr2wFPQR1XJ+0Fd +-U+Ir/C3Q7FK4VIGHK7B0zNvZr5tEjlFfeRezo2JMVw5YWeSagIFcSwK+KqCTH9qc +-pw/Eb8nB/4XNcpTZu7Fg0Wc+ooUCgYEA8sVaicn1Wxkpb45a4qfrA6wOr5xdJ4cC +-A5qs7vjX2OdPIQOmoQhdI7bCWFXZzF33wA4YCws6j5wRaySLIJqdms8Gl9QnODy1 +-ZlA5gwKToBC/jqPmWAXSKb8EH7cHilaxU9OKnQ7CfwlGLHqjMtjrhR7KHlt3CVRs +-oRmvsjZVXI0CgYAmPedslAO6mMhFSSfULrhMXmV82OCqYrrA6EEkVNGbcdnzAOkD +-gfKIWabDd8bFY10po4Mguy0CHzNhBXIioWQWV5BlbhC1YKMLw+S9DzSdLAKGY9gJ +-xQ4+UQ3wtRQ/k+IYR413RUsW2oFvgZ3KSyNeAb9MK6uuv84VdG/OzVSs/QKBgQDn +-kap//l2EbObiWyaERunckdVcW0lcN+KK75J/TGwPoOwQsLvTpPe65kxRGGrtDsEQ +-uCDk/+v3KkZPLgdrrTAih9FhJ+PVN8tMcb+6IM4SA4fFFr/UPJEwct0LJ3oQ0grJ +-y+HPWFHb/Uurh7t99/4H98uR02sjQh1wOeEmm78mzQKBgQDm+LzGH0se6CXQ6cdZ +-g1JRZeXkDEsrW3hfAsW62xJQmXcWxBoblP9OamMY+A06rM5og3JbDk5Zm6JsOaA8 +-wS2gw4ilp46jors4eQey8ux7kB9LzdBoDBBElnsbjLO8oBNZlVcYXg+6BOl/CUi7 +-2whRF0FEjKA8ehrNhAq+VFfFNw== +------END PRIVATE KEY----- ++-----BEGIN RSA PRIVATE KEY----- ++MIIEowIBAAKCAQEAsPnoGUOnrpiSqt4XynxA+HRP7S+BSObI6qJ7fQAVSPtRkqso ++tWxQYLEYzNEx5ZSHTGypibVsJylvCfuToDTfMul8b/CZjP2Ob0LdpYrNH6l5hvFE ++89FU1nZQF15oVLOpUgA7wGiHuEVawrGfey92UE68mOyUVXGweJIVDdxqdMoPvNNU ++l86BU02vlBiESxOuox+dWmuVV7vfYZ79Toh/LUK43YvJh+rhv4nKuF7iHjVjBd9s ++B6iDjj70HFldzOQ9r8SRI+9NirupPTkF5AKNe6kUhKJ1luB7S27ZkvB3tSTT3P59 ++3VVJvnzOjaA1z6Cz+4+eRvcysqhrRgFlwI9TEwIDAQABAoIBAEEYiyDP29vCzx/+ ++dS3LqnI5BjUuJhXUnc6AWX/PCgVAO+8A+gZRgvct7PtZb0sM6P9ZcLrweomlGezI ++FrL0/6xQaa8bBr/ve/a8155OgcjFo6fZEw3Dz7ra5fbSiPmu4/b/kvrg+Br1l77J ++aun6uUAs1f5B9wW+vbR7tzbT/mxaUeDiBzKpe15GwcvbJtdIVMa2YErtRjc1/5B2 ++BGVXyvlJv0SIlcIEMsHgnAFOp1ZgQ08aDzvilLq8XVMOahAhP1O2A3X8hKdXPyrx ++IVWE9bS9ptTo+eF6eNl+d7htpKGEZHUxinoQpWEBTv+iOoHsVunkEJ3vjLP3lyI/ ++fY0NQ1ECgYEA3RBXAjgvIys2gfU3keImF8e/TprLge1I2vbWmV2j6rZCg5r/AS0u ++pii5CvJ5/T5vfJPNgPBy8B/yRDs+6PJO1GmnlhOkG9JAIPkv0RBZvR0PMBtbp6nT ++Y3yo1lwamBVBfY6rc0sLTzosZh2aGoLzrHNMQFMGaauORzBFpY5lU50CgYEAzPHl ++u5DI6Xgep1vr8QvCUuEesCOgJg8Yh1UqVoY/SmQh6MYAv1I9bLGwrb3WW/7kqIoD ++fj0aQV5buVZI2loMomtU9KY5SFIsPV+JuUpy7/+VE01ZQM5FdY8wiYCQiVZYju9X ++Wz5LxMNoz+gT7pwlLCsC4N+R8aoBk404aF1gum8CgYAJ7VTq7Zj4TFV7Soa/T1eE ++k9y8a+kdoYk3BASpCHJ29M5R2KEA7YV9wrBklHTz8VzSTFTbKHEQ5W5csAhoL5Fo ++qoHzFFi3Qx7MHESQb9qHyolHEMNx6QdsHUn7rlEnaTTyrXh3ifQtD6C0yTmFXUIS ++CW9wKApOrnyKJ9nI0HcuZQKBgQCMtoV6e9VGX4AEfpuHvAAnMYQFgeBiYTkBKltQ ++XwozhH63uMMomUmtSG87Sz1TmrXadjAhy8gsG6I0pWaN7QgBuFnzQ/HOkwTm+qKw ++AsrZt4zeXNwsH7QXHEJCFnCmqw9QzEoZTrNtHJHpNboBuVnYcoueZEJrP8OnUG3r ++UjmopwKBgAqB2KYYMUqAOvYcBnEfLDmyZv9BTVNHbR2lKkMYqv5LlvDaBxVfilE0 ++2riO4p6BaAdvzXjKeRrGNEKoHNBpOSfYCOM16NjL8hIZB1CaV3WbT5oY+jp7Mzd5 ++7d56RZOE+ERK2uz/7JX9VSsM/LbH9pJibd4e8mikDS9ntciqOH/3 ++-----END RSA PRIVATE KEY----- diff --git a/SOURCES/rubygem-rexml-3.2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch b/SOURCES/rubygem-rexml-3.2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch new file mode 100644 index 0000000..bf6d96f --- /dev/null +++ b/SOURCES/rubygem-rexml-3.2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch @@ -0,0 +1,1662 @@ +From 694239f0855668c986feba6f1b395ecd94a1f0bc Mon Sep 17 00:00:00 2001 +From: Kouhei Sutou +Date: Fri, 28 Sep 2018 16:29:17 +0900 +Subject: [PATCH 1/7] Make PI parsing more robust + +PI target must be "NAME - (('X' | 'x') ('M' | 'm') ('L' | 'l'))". So +'' is invalid because "=" isn't included in NAME. + +Without this change, 'pos="3"' was accepted as PI target. + +=== +Commit implements #process_instruction method for baseparser +that later patches rely upon. + +https://github.com/ruby/rexml/commit/694239f0855668c986feba6f1b395ecd94a1f0bc +--- + lib/rexml/parsers/baseparser.rb | 18 ++++++++----- + test/rexml/data/t75.xml | 2 +- + .../parse/test_processing_instruction.rb | 25 +++++++++++++++++++ + 3 files changed, 38 insertions(+), 7 deletions(-) + create mode 100644 test/rexml/parse/test_processing_instruction.rb + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index e7ef695912..84a0bcdcec 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -61,7 +61,7 @@ class BaseParser + XMLDECL_START = /\A<\?xml\s/u; + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um + INSTRUCTION_START = /\A<\?/u +- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um ++ INSTRUCTION_PATTERN = /<\?#{QNAME}(\s+.*?)?\?>/um + TAG_MATCH = /\A<((?>#{QNAME_STR}))/um + CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um + +@@ -229,7 +229,7 @@ def pull_event + standalone = standalone[1] unless standalone.nil? + return [ :xmldecl, version, encoding, standalone ] + when INSTRUCTION_START +- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] ++ return process_instruction + when DOCTYPE_START + base_error_message = "Malformed DOCTYPE" + @source.match(DOCTYPE_START, true) +@@ -395,10 +395,7 @@ def pull_event + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) + elsif @source.buffer[1] == ?? +- md = @source.match( INSTRUCTION_PATTERN, true ) +- return [ :processing_instruction, md[1], md[2] ] if md +- raise REXML::ParseException.new( "Bad instruction declaration", +- @source) ++ return process_instruction + else + # Get the next tag + md = @source.match(TAG_MATCH, true) +@@ -675,6 +672,15 @@ def parse_attributes(prefixes, curr_ns) + end + return attributes, closed + end ++ ++ def process_instruction ++ match_data = @source.match(INSTRUCTION_PATTERN, true) ++ unless match_data ++ message = "Invalid processing instruction node" ++ raise REXML::ParseException.new(message, @source) ++ end ++ [:processing_instruction, match_data[1], match_data[4]] ++ end + end + end + end +diff --git a/test/rexml/data/t75.xml b/test/rexml/data/t75.xml +index 0911fb1b1a..eb3cccee4b 100644 +--- a/test/rexml/data/t75.xml ++++ b/test/rexml/data/t75.xml +@@ -1,4 +1,4 @@ +- ++ + + + ") ++ end ++ assert_equal(<<-DETAIL.chomp, exception.to_s) ++Invalid processing instruction node ++Line: 1 ++Position: 4 ++Last 80 unconsumed characters: ++ ++ DETAIL ++ end ++ end ++ end ++end + +From 810d2285235d5501a0a124f300832e6e9515da3c Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Wed, 17 Jan 2024 15:32:57 +0900 +Subject: [PATCH 2/7] Use string scanner with baseparser (#105) + +Using StringScanner reduces the string copying process and speeds up the +process. + +And I removed unnecessary methods. + +https://github.com/ruby/rexml/actions/runs/7549990000/job/20554906140?pr=105 + +``` +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-linux] +Calculating ------------------------------------- + rexml 3.2.6 master 3.2.6(YJIT) master(YJIT) + dom 4.868 5.077 8.137 8.303 i/s - 100.000 times in 20.540529s 19.696590s 12.288900s 12.043666s + sax 13.597 13.953 19.206 20.948 i/s - 100.000 times in 7.354343s 7.167142s 5.206745s 4.773765s + pull 15.641 16.918 22.266 25.378 i/s - 100.000 times in 6.393424s 5.910955s 4.491201s 3.940471s + stream 14.339 15.844 19.810 22.206 i/s - 100.000 times in 6.973856s 6.311350s 5.047957s 4.503244s + +Comparison: + dom + master(YJIT): 8.3 i/s + 3.2.6(YJIT): 8.1 i/s - 1.02x slower + master: 5.1 i/s - 1.64x slower + rexml 3.2.6: 4.9 i/s - 1.71x slower + + sax + master(YJIT): 20.9 i/s + 3.2.6(YJIT): 19.2 i/s - 1.09x slower + master: 14.0 i/s - 1.50x slower + rexml 3.2.6: 13.6 i/s - 1.54x slower + + pull + master(YJIT): 25.4 i/s + 3.2.6(YJIT): 22.3 i/s - 1.14x slower + master: 16.9 i/s - 1.50x slower + rexml 3.2.6: 15.6 i/s - 1.62x slower + + stream + master(YJIT): 22.2 i/s + 3.2.6(YJIT): 19.8 i/s - 1.12x slower + master: 15.8 i/s - 1.40x slower + rexml 3.2.6: 14.3 i/s - 1.55x slower +``` + +- YJIT=ON : 1.02x - 1.14x faster +- YJIT=OFF : 1.02x - 1.10x faster + +--------- + +Co-authored-by: Sutou Kouhei + +=== +The PR focuses on enhancements in lib/rexml/source.rb. +StringScanner is used instead of just a string for scanning and +matching. +This behavior is relied upon in later patches. + +Preparation for fix for CVE-2024-35176. + +https://github.com/ruby/rexml/commit/810d2285235d5501a0a124f300832e6e9515da3c +--- + lib/rexml/parsers/baseparser.rb | 21 ++- + lib/rexml/source.rb | 149 ++++++-------------- + test/rexml/parse/test_entity_declaration.rb | 36 +++++ + test/rexml/test_core.rb | 2 +- + 4 files changed, 93 insertions(+), 115 deletions(-) + create mode 100644 test/rexml/parse/test_entity_declaration.rb + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index 84a0bcdcec..c29783af5c 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -98,7 +98,7 @@ class BaseParser + ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" + PEDECL = "" + GEDECL = "" +- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um ++ ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um + + NOTATIONDECL_START = /\A\s*0 +- rv +- end +- + def read + end + +- def consume( pattern ) +- @buffer = $' if pattern.match( @buffer ) +- end +- +- def match_to( char, pattern ) +- return pattern.match(@buffer) +- end +- +- def match_to_consume( char, pattern ) +- md = pattern.match(@buffer) +- @buffer = $' +- return md +- end +- + def match(pattern, cons=false) +- md = pattern.match(@buffer) +- @buffer = $' if cons and md +- return md ++ if cons ++ @scanner.scan(pattern).nil? ? nil : @scanner ++ else ++ @scanner.check(pattern).nil? ? nil : @scanner ++ end + end + + # @return true if the Source is exhausted + def empty? +- @buffer == "" +- end +- +- def position +- @orig.index( @buffer ) ++ @scanner.eos? + end + + # @return the current line in the source + def current_line + lines = @orig.split +- res = lines.grep @buffer[0..30] ++ res = lines.grep @scanner.rest[0..30] + res = res[-1] if res.kind_of? Array + lines.index( res ) if res + end + + private ++ + def detect_encoding +- buffer_encoding = @buffer.encoding ++ scanner_encoding = @scanner.rest.encoding + detected_encoding = "UTF-8" + begin +- @buffer.force_encoding("ASCII-8BIT") +- if @buffer[0, 2] == "\xfe\xff" +- @buffer[0, 2] = "" ++ @scanner.string.force_encoding("ASCII-8BIT") ++ if @scanner.scan(/\xfe\xff/n) + detected_encoding = "UTF-16BE" +- elsif @buffer[0, 2] == "\xff\xfe" +- @buffer[0, 2] = "" ++ elsif @scanner.scan(/\xff\xfe/n) + detected_encoding = "UTF-16LE" +- elsif @buffer[0, 3] == "\xef\xbb\xbf" +- @buffer[0, 3] = "" ++ elsif @scanner.scan(/\xef\xbb\xbf/n) + detected_encoding = "UTF-8" + end + ensure +- @buffer.force_encoding(buffer_encoding) ++ @scanner.string.force_encoding(scanner_encoding) + end + self.encoding = detected_encoding + end + + def encoding_updated + if @encoding != 'UTF-8' +- @buffer = decode(@buffer) ++ @scanner.string = decode(@scanner.rest) + @to_utf = true + else + @to_utf = false +- @buffer.force_encoding ::Encoding::UTF_8 ++ @scanner.string.force_encoding(::Encoding::UTF_8) + end + end + end +@@ -172,7 +138,7 @@ def initialize(arg, block_size=500, encoding=nil) + end + + if !@to_utf and +- @buffer.respond_to?(:force_encoding) and ++ @orig.respond_to?(:force_encoding) and + @source.respond_to?(:external_encoding) and + @source.external_encoding != ::Encoding::UTF_8 + @force_utf8 = true +@@ -181,65 +147,44 @@ def initialize(arg, block_size=500, encoding=nil) + end + end + +- def scan(pattern, cons=false) +- rv = super +- # You'll notice that this next section is very similar to the same +- # section in match(), but just a liiittle different. This is +- # because it is a touch faster to do it this way with scan() +- # than the way match() does it; enough faster to warrant duplicating +- # some code +- if rv.size == 0 +- until @buffer =~ pattern or @source.nil? +- begin +- @buffer << readline +- rescue Iconv::IllegalSequence +- raise +- rescue +- @source = nil +- end +- end +- rv = super +- end +- rv.taint +- rv +- end +- + def read + begin +- @buffer << readline ++ # NOTE: `@scanner << readline` does not free memory, so when parsing huge XML in JRuby's DOM, ++ # out-of-memory error `Java::JavaLang::OutOfMemoryError: Java heap space` occurs. ++ # `@scanner.string = @scanner.rest + readline` frees memory that is already consumed ++ # and avoids this problem. ++ @scanner.string = @scanner.rest + readline + rescue Exception, NameError + @source = nil + end + end + +- def consume( pattern ) +- match( pattern, true ) +- end +- + def match( pattern, cons=false ) +- rv = pattern.match(@buffer) +- @buffer = $' if cons and rv +- while !rv and @source ++ if cons ++ md = @scanner.scan(pattern) ++ else ++ md = @scanner.check(pattern) ++ end ++ while md.nil? and @source + begin +- @buffer << readline +- rv = pattern.match(@buffer) +- @buffer = $' if cons and rv ++ @scanner << readline ++ if cons ++ md = @scanner.scan(pattern) ++ else ++ md = @scanner.check(pattern) ++ end + rescue + @source = nil + end + end +- rv.taint +- rv ++ ++ md.nil? ? nil : @scanner + end + + def empty? + super and ( @source.nil? || @source.eof? ) + end + +- def position +- @er_source.pos rescue 0 +- end +- + # @return the current line in the source + def current_line + begin +@@ -289,7 +234,7 @@ def encoding_updated + @source.set_encoding(@encoding, @encoding) + end + @line_break = encode(">") +- @pending_buffer, @buffer = @buffer, "" ++ @pending_buffer, @scanner.string = @scanner.rest, "" + @pending_buffer.force_encoding(@encoding) + super + end +diff --git a/test/rexml/parse/test_entity_declaration.rb b/test/rexml/parse/test_entity_declaration.rb +new file mode 100644 +index 0000000000..e15deec60d +--- /dev/null ++++ b/test/rexml/parse/test_entity_declaration.rb +@@ -0,0 +1,36 @@ ++# frozen_string_literal: false ++require 'test/unit' ++require 'rexml/document' ++ ++module REXMLTests ++ class TestParseEntityDeclaration < Test::Unit::TestCase ++ private ++ def xml(internal_subset) ++ <<-XML ++ ++ ++ XML ++ end ++ ++ def parse(internal_subset) ++ REXML::Document.new(xml(internal_subset)).doctype ++ end ++ ++ def test_empty ++ exception = assert_raise(REXML::ParseException) do ++ parse(<<-INTERNAL_SUBSET) ++ ++ INTERNAL_SUBSET ++ end ++ assert_equal(<<-DETAIL.chomp, exception.to_s) ++Malformed notation declaration: name is missing ++Line: 5 ++Position: 72 ++Last 80 unconsumed characters: ++ ]> ++ DETAIL ++ end ++ end ++end +diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb +index ee5438d5e5..d4ece491b9 100644 +--- a/test/rexml/test_core.rb ++++ b/test/rexml/test_core.rb +@@ -681,7 +681,7 @@ def test_iso_8859_1_output_function + koln_iso_8859_1 = "K\xF6ln" + koln_utf8 = "K\xc3\xb6ln" + source = Source.new( koln_iso_8859_1, 'iso-8859-1' ) +- results = source.scan(/.*/)[0] ++ results = source.match(/.*/)[0] + koln_utf8.force_encoding('UTF-8') if koln_utf8.respond_to?(:force_encoding) + assert_equal koln_utf8, results + output << results + +From 77128555476cb0db798e2912fb3a07d6411dc320 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sun, 21 Jan 2024 20:02:00 +0900 +Subject: [PATCH 3/7] Use `@scanner << readline` instead of `@scanner.string = + @scanner.rest + readline` (#107) + +## Why + +JRuby's `StringScanner#<<` and `StringScanner#scan` OutOfMemoryError has +been resolved in strscan gem 3.0.9. + +https://github.com/ruby/strscan/issues/83 + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.958 11.044 16.615 16.783 i/s - 100.000 times in 9.126104s 9.055023s 6.018799s 5.958437s + sax 29.624 29.609 44.390 45.370 i/s - 100.000 times in 3.375641s 3.377372s 2.252774s 2.204080s + pull 33.868 34.695 51.173 53.492 i/s - 100.000 times in 2.952679s 2.882229s 1.954138s 1.869422s + stream 31.719 32.351 43.604 45.403 i/s - 100.000 times in 3.152713s 3.091052s 2.293356s 2.202514s + +Comparison: + dom + after(YJIT): 16.8 i/s + before(YJIT): 16.6 i/s - 1.01x slower + after: 11.0 i/s - 1.52x slower + before: 11.0 i/s - 1.53x slower + + sax + after(YJIT): 45.4 i/s + before(YJIT): 44.4 i/s - 1.02x slower + before: 29.6 i/s - 1.53x slower + after: 29.6 i/s - 1.53x slower + + pull + after(YJIT): 53.5 i/s + before(YJIT): 51.2 i/s - 1.05x slower + after: 34.7 i/s - 1.54x slower + before: 33.9 i/s - 1.58x slower + + stream + after(YJIT): 45.4 i/s + before(YJIT): 43.6 i/s - 1.04x slower + after: 32.4 i/s - 1.40x slower + before: 31.7 i/s - 1.43x slower + +``` + +- YJIT=ON : 1.01x - 1.05x faster +- YJIT=OFF : 1.00x - 1.02x faster +=== +Backported, as the PR #114 and mainly PR #126 count with the method using `<<` +instead of setting the scanner's string directly. + +https://github.com/ruby/rexml/commit/77128555476cb0db798e2912fb3a07d6411dc320 +--- + lib/rexml/source.rb | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb +index 9883e79210..4624777fa0 100644 +--- a/lib/rexml/source.rb ++++ b/lib/rexml/source.rb +@@ -149,11 +149,7 @@ def initialize(arg, block_size=500, encoding=nil) + + def read + begin +- # NOTE: `@scanner << readline` does not free memory, so when parsing huge XML in JRuby's DOM, +- # out-of-memory error `Java::JavaLang::OutOfMemoryError: Java heap space` occurs. +- # `@scanner.string = @scanner.rest + readline` frees memory that is already consumed +- # and avoids this problem. +- @scanner.string = @scanner.rest + readline ++ @scanner << readline + rescue Exception, NameError + @source = nil + end + +From 370666e314816b57ecd5878e757224c3b6bc93f5 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Tue, 27 Feb 2024 09:48:35 +0900 +Subject: [PATCH 4/7] Use more StringScanner based API to parse XML (#114) + +## Why? + +Improve maintainability by optimizing the process so that the parsing +process proceeds using StringScanner#scan. + +## Changed +- Change `REXML::Parsers::BaseParser` from `frozen_string_literal: +false` to `frozen_string_literal: true`. +- Added `Source#string=` method for error message output. +- Added TestParseDocumentTypeDeclaration#test_no_name test case. +- Of the `intSubset` of DOCTYPE, " + +=== +This commit introduces wider use of StringScanner in +lib/rexml/parsers/baseparse.rb. + +Which is relied upon in later patches. + +MISSING_ATTRIBUTE_QUOTES is not present in upstream +lib/rexml/parsers/baseparser.rb at this point. +While it doesn't seem to be used, this patch does not +remove the constant so let's keep it around. + +https://github.com/ruby/rexml/commit/370666e314816b57ecd5878e757224c3b6bc93f5 +--- + lib/rexml/parsers/baseparser.rb | 328 ++++++++++-------- + lib/rexml/source.rb | 31 +- + .../parse/test_document_type_declaration.rb | 15 + + 3 files changed, 205 insertions(+), 169 deletions(-) + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index c29783af5c..dafc1b1449 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -1,4 +1,4 @@ +-# frozen_string_literal: false ++# frozen_string_literal: true + + require "strscan" + +@@ -121,6 +121,19 @@ class BaseParser + ###################################################################### + MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um + ++ module Private ++ INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um ++ TAG_PATTERN = /((?>#{QNAME_STR}))/um ++ CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ++ ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um ++ NAME_PATTERN = /\s*#{NAME}/um ++ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" ++ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" ++ ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um ++ end ++ private_constant :Private ++ include Private ++ + def initialize( source ) + self.stream = source + @listeners = [] +@@ -206,180 +219,172 @@ def pull_event + #STDERR.puts @source.encoding + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + if @document_status == nil +- word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) +- word = word[1] unless word.nil? +- #STDERR.puts "WORD = #{word.inspect}" +- case word +- when COMMENT_START +- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] +- when XMLDECL_START +- #STDERR.puts "XMLDECL" +- results = @source.match( XMLDECL_PATTERN, true )[1] +- version = VERSION.match( results ) +- version = version[1] unless version.nil? +- encoding = ENCODING.match(results) +- encoding = encoding[1] unless encoding.nil? +- if need_source_encoding_update?(encoding) +- @source.encoding = encoding +- end +- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding +- encoding = "UTF-16" +- end +- standalone = STANDALONE.match(results) +- standalone = standalone[1] unless standalone.nil? +- return [ :xmldecl, version, encoding, standalone ] +- when INSTRUCTION_START ++ if @source.match("/um, true) +- id = [nil, nil, nil] +- @document_status = :after_doctype +- else +- id = parse_id(base_error_message, +- accept_external_id: true, +- accept_public_id: false) +- if id[0] == "SYSTEM" +- # For backward compatibility +- id[1], id[2] = id[2], nil ++ elsif @source.match("/um, true)[1] ] ++ elsif @source.match("DOCTYPE", true) ++ base_error_message = "Malformed DOCTYPE" ++ unless @source.match(/\s+/um, true) ++ if @source.match(">") ++ message = "#{base_error_message}: name is missing" ++ else ++ message = "#{base_error_message}: invalid name" ++ end ++ @source.string = "/um, true) ++ @nsstack.unshift(curr_ns=Set.new) ++ name = parse_name(base_error_message) ++ if @source.match(/\s*\[/um, true) ++ id = [nil, nil, nil] ++ @document_status = :in_doctype ++ elsif @source.match(/\s*>/um, true) ++ id = [nil, nil, nil] + @document_status = :after_doctype + else +- message = "#{base_error_message}: garbage after external ID" +- raise REXML::ParseException.new(message, @source) ++ id = parse_id(base_error_message, ++ accept_external_id: true, ++ accept_public_id: false) ++ if id[0] == "SYSTEM" ++ # For backward compatibility ++ id[1], id[2] = id[2], nil ++ end ++ if @source.match(/\s*\[/um, true) ++ @document_status = :in_doctype ++ elsif @source.match(/\s*>/um, true) ++ @document_status = :after_doctype ++ else ++ message = "#{base_error_message}: garbage after external ID" ++ raise REXML::ParseException.new(message, @source) ++ end + end +- end +- args = [:start_doctype, name, *id] +- if @document_status == :after_doctype +- @source.match(/\A\s*/um, true) +- @stack << [ :end_doctype ] +- end +- return args +- when /\A\s+/ +- else +- @document_status = :after_doctype +- if @source.encoding == "UTF-8" +- @source.buffer_encoding = ::Encoding::UTF_8 ++ args = [:start_doctype, name, *id] ++ if @document_status == :after_doctype ++ @source.match(/\s*/um, true) ++ @stack << [ :end_doctype ] ++ end ++ return args ++ else ++ message = "Invalid XML" ++ raise REXML::ParseException.new(message, @source) + end + end + end + if @document_status == :in_doctype +- md = @source.match(/\A\s*(.*?>)/um) +- case md[1] +- when SYSTEMENTITY +- match = @source.match( SYSTEMENTITY, true )[1] +- return [ :externalentity, match ] +- +- when ELEMENTDECL_START +- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] +- +- when ENTITY_START +- match = [:entitydecl, *@source.match( ENTITYDECL, true ).captures.compact] +- ref = false +- if match[1] == '%' +- ref = true +- match.delete_at 1 +- end +- # Now we have to sort out what kind of entity reference this is +- if match[2] == 'SYSTEM' +- # External reference +- match[3] = match[3][1..-2] # PUBID +- match.delete_at(4) if match.size > 4 # Chop out NDATA decl +- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] +- elsif match[2] == 'PUBLIC' +- # External reference +- match[3] = match[3][1..-2] # PUBID +- match[4] = match[4][1..-2] # HREF +- match.delete_at(5) if match.size > 5 # Chop out NDATA decl +- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] +- else +- match[2] = match[2][1..-2] +- match.pop if match.size == 4 +- # match is [ :entity, name, value ] +- end +- match << '%' if ref +- return match +- when ATTLISTDECL_START +- md = @source.match( ATTLISTDECL_PATTERN, true ) +- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? +- element = md[1] +- contents = md[0] +- +- pairs = {} +- values = md[0].scan( ATTDEF_RE ) +- values.each do |attdef| +- unless attdef[3] == "#IMPLIED" +- attdef.compact! +- val = attdef[3] +- val = attdef[4] if val == "#FIXED " +- pairs[attdef[0]] = val +- if attdef[0] =~ /^xmlns:(.*)/ +- @nsstack[0] << $1 +- end ++ @source.match(/\s*/um, true) # skip spaces ++ if @source.match("/um, true) ++ raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? ++ return [ :elementdecl, "/um) +- message = "#{base_error_message}: name is missing" ++ # Now we have to sort out what kind of entity reference this is ++ if match[2] == 'SYSTEM' ++ # External reference ++ match[3] = match[3][1..-2] # PUBID ++ match.delete_at(4) if match.size > 4 # Chop out NDATA decl ++ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] ++ elsif match[2] == 'PUBLIC' ++ # External reference ++ match[3] = match[3][1..-2] # PUBID ++ match[4] = match[4][1..-2] # HREF ++ match.delete_at(5) if match.size > 5 # Chop out NDATA decl ++ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] + else +- message = "#{base_error_message}: invalid declaration name" ++ match[2] = match[2][1..-2] ++ match.pop if match.size == 4 ++ # match is [ :entity, name, value ] + end +- raise REXML::ParseException.new(message, @source) +- end +- name = parse_name(base_error_message) +- id = parse_id(base_error_message, +- accept_external_id: true, +- accept_public_id: true) +- unless @source.match(/\A\s*>/um, true) +- message = "#{base_error_message}: garbage before end >" +- raise REXML::ParseException.new(message, @source) ++ match << '%' if ref ++ return match ++ elsif @source.match("ATTLIST", true) ++ md = @source.match(ATTLISTDECL_END, true) ++ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? ++ element = md[1] ++ contents = md[0] ++ ++ pairs = {} ++ values = md[0].scan( ATTDEF_RE ) ++ values.each do |attdef| ++ unless attdef[3] == "#IMPLIED" ++ attdef.compact! ++ val = attdef[3] ++ val = attdef[4] if val == "#FIXED " ++ pairs[attdef[0]] = val ++ if attdef[0] =~ /^xmlns:(.*)/ ++ @nsstack[0] << $1 ++ end ++ end ++ end ++ return [ :attlistdecl, element, pairs, contents ] ++ elsif @source.match("NOTATION", true) ++ base_error_message = "Malformed notation declaration" ++ unless @source.match(/\s+/um, true) ++ if @source.match(">") ++ message = "#{base_error_message}: name is missing" ++ else ++ message = "#{base_error_message}: invalid name" ++ end ++ @source.string = " /um, true) ++ message = "#{base_error_message}: garbage before end >" ++ raise REXML::ParseException.new(message, @source) ++ end ++ return [:notationdecl, name, *id] ++ elsif md = @source.match(/--(.*?)-->/um, true) ++ case md[1] ++ when /--/, /-\z/ ++ raise REXML::ParseException.new("Malformed comment", @source) ++ end ++ return [ :comment, md[1] ] if md + end +- return [:notationdecl, name, *id] +- when DOCTYPE_END ++ elsif match = @source.match(/(%.*?;)\s*/um, true) ++ return [ :externalentity, match[1] ] ++ elsif @source.match(/\]\s*>/um, true) + @document_status = :after_doctype +- @source.match( DOCTYPE_END, true ) + return [ :end_doctype ] + end + end + if @document_status == :after_doctype +- @source.match(/\A\s*/um, true) ++ @source.match(/\s*/um, true) + end + begin +- @source.read if @source.buffer.size<2 +- if @source.buffer[0] == ?< +- if @source.buffer[1] == ?/ ++ if @source.match("<", true) ++ if @source.match("/", true) + @nsstack.shift + last_tag = @tags.pop +- #md = @source.match_to_consume( '>', CLOSE_MATCH) +- md = @source.match( CLOSE_MATCH, true ) ++ md = @source.match(CLOSE_PATTERN, true) + if md and !last_tag + message = "Unexpected top-level end tag (got '#{md[1]}')" + raise REXML::ParseException.new(message, @source) + end + if md.nil? or last_tag != md[1] + message = "Missing end tag for '#{last_tag}'" +- message << " (got '#{md[1]}')" if md ++ message += " (got '#{md[1]}')" if md ++ @source.string = "]*>)/um) ++ elsif @source.match("!", true) ++ md = @source.match(/([^>]*>)/um) + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + raise REXML::ParseException.new("Malformed node", @source) unless md +- if md[0][2] == ?- +- md = @source.match( COMMENT_PATTERN, true ) ++ if md[0][0] == ?- ++ md = @source.match(/--(.*?)-->/um, true) + + case md[1] + when /--/, /-\z/ +@@ -388,19 +393,18 @@ def pull_event + + return [ :comment, md[1] ] if md + else +- md = @source.match( CDATA_PATTERN, true ) ++ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) + return [ :cdata, md[1] ] if md + end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) +- elsif @source.buffer[1] == ?? ++ elsif @source.match("?", true) + return process_instruction + else + # Get the next tag +- md = @source.match(TAG_MATCH, true) ++ md = @source.match(TAG_PATTERN, true) + unless md +- # Check for missing attribute quotes +- raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) ++ @source.string = "<" + @source.buffer + raise REXML::ParseException.new("malformed XML: missing tag start", @source) + end + tag = md[1] +@@ -425,7 +429,7 @@ def pull_event + return [ :start_element, tag, attributes ] + end + else +- md = @source.match( TEXT_PATTERN, true ) ++ md = @source.match(/([^<]*)/um, true) + text = md[1] + if md[0].length == 0 + @source.match( /(\s+)/, true ) +@@ -472,8 +476,7 @@ def normalize( input, entities=nil, entity_filter=nil ) + + # Unescapes all possible entities + def unnormalize( string, entities=nil, filter=nil ) +- rv = string.clone +- rv.gsub!( /\r\n?/, "\n" ) ++ rv = string.gsub( /\r\n?/, "\n" ) + matches = rv.scan( REFERENCE_RE ) + return rv if matches.size == 0 + rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { +@@ -508,9 +511,9 @@ def need_source_encoding_update?(xml_declaration_encoding) + end + + def parse_name(base_error_message) +- md = @source.match(/\A\s*#{NAME}/um, true) ++ md = @source.match(NAME_PATTERN, true) + unless md +- if @source.match(/\A\s*\S/um) ++ if @source.match(/\s*\S/um) + message = "#{base_error_message}: invalid name" + else + message = "#{base_error_message}: name is missing" +@@ -671,12 +674,29 @@ def parse_attributes(prefixes, curr_ns) + end + + def process_instruction +- match_data = @source.match(INSTRUCTION_PATTERN, true) ++ match_data = @source.match(INSTRUCTION_END, true) + unless match_data + message = "Invalid processing instruction node" ++ @source.string = " + DETAIL + end ++ ++ def test_no_name ++ exception = assert_raise(REXML::ParseException) do ++ parse(<<-DOCTYPE) ++ ++ DOCTYPE ++ end ++ assert_equal(<<-DETAIL.chomp, exception.to_s) ++Malformed DOCTYPE: name is missing ++Line: 3 ++Position: 17 ++Last 80 unconsumed characters: ++ ++ DETAIL ++ end + end + + class TestExternalID < self + +From 0496940d5998ccbc50d16fb734993ab50fc60c2d Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Mon, 18 Mar 2024 23:30:47 +0900 +Subject: [PATCH 5/7] Optimize the parse_attributes method to use + `Source#match` to parse XML. (#119) + +## Why? + +Improve maintainability by consolidating processing into `Source#match`. + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.891 10.622 16.356 17.403 i/s - 100.000 times in 9.182130s 9.414177s 6.113806s 5.746133s + sax 30.335 29.845 49.749 54.877 i/s - 100.000 times in 3.296483s 3.350595s 2.010071s 1.822259s + pull 35.514 34.801 61.123 66.908 i/s - 100.000 times in 2.815793s 2.873484s 1.636041s 1.494591s + stream 35.141 34.475 52.110 56.836 i/s - 100.000 times in 2.845646s 2.900638s 1.919017s 1.759456s + +Comparison: + dom + after(YJIT): 17.4 i/s + before(YJIT): 16.4 i/s - 1.06x slower + before: 10.9 i/s - 1.60x slower + after: 10.6 i/s - 1.64x slower + + sax + after(YJIT): 54.9 i/s + before(YJIT): 49.7 i/s - 1.10x slower + before: 30.3 i/s - 1.81x slower + after: 29.8 i/s - 1.84x slower + + pull + after(YJIT): 66.9 i/s + before(YJIT): 61.1 i/s - 1.09x slower + before: 35.5 i/s - 1.88x slower + after: 34.8 i/s - 1.92x slower + + stream + after(YJIT): 56.8 i/s + before(YJIT): 52.1 i/s - 1.09x slower + before: 35.1 i/s - 1.62x slower + after: 34.5 i/s - 1.65x slower + +``` + +- YJIT=ON : 1.06x - 1.10x faster +- YJIT=OFF : 0.97x - 0.98x faster +=== +"test_attribute_namespace_conflict" in test/rexml/test_core.rb +adjustment not applied as the commit that added the test is not present. + +Wider use of Source#match to parse XML. + +Changes the way BaseParser#parse_attribute +grabs fields that make up an XML attribute. + +This is relied upon by upstream 4325835f92f3f142ebd91a3fdba4e1f1ab7f1cfb +that actually fixes CVE-2024-35176. + +https://github.com/ruby/rexml/commit/0496940d5998ccbc50d16fb734993ab50fc60c2d +--- + lib/rexml/parsers/baseparser.rb | 113 ++++++++++++------------------- + test/rexml/parse/test_element.rb | 2 +- + test/rexml/test_core.rb | 17 ++++- + 3 files changed, 61 insertions(+), 71 deletions(-) + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index dafc1b1449..45eabe8cc5 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -123,7 +123,7 @@ class BaseParser + + module Private + INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um +- TAG_PATTERN = /((?>#{QNAME_STR}))/um ++ TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um + ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NAME_PATTERN = /\s*#{NAME}/um +@@ -592,85 +592,60 @@ def parse_id_invalid_details(accept_external_id:, + def parse_attributes(prefixes, curr_ns) + attributes = {} + closed = false +- match_data = @source.match(/^(.*?)(\/)?>/um, true) +- if match_data.nil? +- message = "Start tag isn't ended" +- raise REXML::ParseException.new(message, @source) +- end +- +- raw_attributes = match_data[1] +- closed = !match_data[2].nil? +- return attributes, closed if raw_attributes.nil? +- return attributes, closed if raw_attributes.empty? +- +- scanner = StringScanner.new(raw_attributes) +- until scanner.eos? +- if scanner.scan(/\s+/) +- break if scanner.eos? +- end +- +- pos = scanner.pos +- loop do +- break if scanner.scan(ATTRIBUTE_PATTERN) +- unless scanner.scan(QNAME) +- message = "Invalid attribute name: <#{scanner.rest}>" +- raise REXML::ParseException.new(message, @source) +- end +- name = scanner[0] +- unless scanner.scan(/\s*=\s*/um) ++ while true ++ if @source.match(">", true) ++ return attributes, closed ++ elsif @source.match("/>", true) ++ closed = true ++ return attributes, closed ++ elsif match = @source.match(QNAME, true) ++ name = match[1] ++ prefix = match[2] ++ local_part = match[3] ++ ++ unless @source.match(/\s*=\s*/um, true) + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- quote = scanner.scan(/['"]/) +- unless quote +- message = "Missing attribute value start quote: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end +- unless scanner.scan(/.*#{Regexp.escape(quote)}/um) +- match_data = @source.match(/^(.*?)(\/)?>/um, true) +- if match_data +- scanner << "/" if closed +- scanner << ">" +- scanner << match_data[1] +- scanner.pos = pos +- closed = !match_data[2].nil? +- next ++ unless match = @source.match(/(['"])(.*?)\1\s*/um, true) ++ if match = @source.match(/(['"])/, true) ++ message = ++ "Missing attribute value end quote: <#{name}>: <#{match[1]}>" ++ raise REXML::ParseException.new(message, @source) ++ else ++ message = "Missing attribute value start quote: <#{name}>" ++ raise REXML::ParseException.new(message, @source) + end +- message = +- "Missing attribute value end quote: <#{name}>: <#{quote}>" +- raise REXML::ParseException.new(message, @source) + end +- end +- name = scanner[1] +- prefix = scanner[2] +- local_part = scanner[3] +- # quote = scanner[4] +- value = scanner[5] +- if prefix == "xmlns" +- if local_part == "xml" +- if value != "http://www.w3.org/XML/1998/namespace" +- msg = "The 'xml' prefix must not be bound to any other namespace "+ ++ value = match[2] ++ if prefix == "xmlns" ++ if local_part == "xml" ++ if value != "http://www.w3.org/XML/1998/namespace" ++ msg = "The 'xml' prefix must not be bound to any other namespace "+ ++ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" ++ raise REXML::ParseException.new( msg, @source, self ) ++ end ++ elsif local_part == "xmlns" ++ msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" +- raise REXML::ParseException.new( msg, @source, self ) ++ raise REXML::ParseException.new( msg, @source, self) + end +- elsif local_part == "xmlns" +- msg = "The 'xmlns' prefix must not be declared "+ +- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" +- raise REXML::ParseException.new( msg, @source, self) ++ curr_ns << local_part ++ elsif prefix ++ prefixes << prefix unless prefix == "xml" + end +- curr_ns << local_part +- elsif prefix +- prefixes << prefix unless prefix == "xml" +- end + +- if attributes.has_key?(name) +- msg = "Duplicate attribute #{name.inspect}" +- raise REXML::ParseException.new(msg, @source, self) +- end ++ if attributes.has_key?(name) ++ msg = "Duplicate attribute #{name.inspect}" ++ raise REXML::ParseException.new(msg, @source, self) ++ end + +- attributes[name] = value ++ attributes[name] = value ++ else ++ message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" ++ raise REXML::ParseException.new(message, @source) ++ end + end +- return attributes, closed + end + + def process_instruction +diff --git a/test/rexml/parse/test_element.rb b/test/rexml/parse/test_element.rb +index e8dce4b997..987214f3bb 100644 +--- a/test/rexml/parse/test_element.rb ++++ b/test/rexml/parse/test_element.rb +@@ -43,7 +43,7 @@ def test_empty_namespace_attribute_name + Line: 1 + Position: 13 + Last 80 unconsumed characters: +- ++:a=""> + DETAIL + end + +diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb +index d4ece491b9..46de90ccd7 100644 +--- a/test/rexml/test_core.rb ++++ b/test/rexml/test_core.rb +@@ -1277,11 +1277,26 @@ def test_ticket_21 + exception = assert_raise(ParseException) do + Document.new(src) + end +- assert_equal(<<-DETAIL, exception.to_s) ++ assert_equal(<<-DETAIL.chomp, exception.to_s) + Missing attribute value start quote: + Line: 1 + Position: 16 + Last 80 unconsumed characters: ++value/> ++ DETAIL ++ end ++ ++ def test_parse_exception_on_missing_attribute_end_quote ++ src = ' ++Line: 1 ++Position: 17 ++Last 80 unconsumed characters: ++value/> + DETAIL + end + + +From 4325835f92f3f142ebd91a3fdba4e1f1ab7f1cfb Mon Sep 17 00:00:00 2001 +From: Nobuyoshi Nakada +Date: Thu, 16 May 2024 11:26:51 +0900 +Subject: [PATCH 6/7] Read quoted attributes in chunks (#126) + +=== +test/rexml/test_document.rb: +* test_gt_linear_performance +Test not included, Ruby 2.5 does not have assert_linear_performance +method available in the test suite. It was introduced in Ruby 2.7. +* Do not require 'core_assertions' +Current requires are fine for running the tests in mock build. + +The fix for CVE-2024-35176. By reading the field in chunks instead of 1 +by 1 it improves performance for large strings composed of angle +brackets. + +https://github.com/ruby/rexml/commit/4325835f92f3f142ebd91a3fdba4e1f1ab7f1cfb +--- + lib/rexml/parsers/baseparser.rb | 20 ++++++++++---------- + lib/rexml/source.rb | 29 ++++++++++++++++++++++++----- + 2 files changed, 34 insertions(+), 15 deletions(-) + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index 45eabe8cc5..304835e407 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -607,17 +607,17 @@ def parse_attributes(prefixes, curr_ns) + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- unless match = @source.match(/(['"])(.*?)\1\s*/um, true) +- if match = @source.match(/(['"])/, true) +- message = +- "Missing attribute value end quote: <#{name}>: <#{match[1]}>" +- raise REXML::ParseException.new(message, @source) +- else +- message = "Missing attribute value start quote: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end ++ unless match = @source.match(/(['"])/, true) ++ message = "Missing attribute value start quote: <#{name}>" ++ raise REXML::ParseException.new(message, @source) ++ end ++ quote = match[1] ++ value = @source.read_until(quote) ++ unless value.chomp!(quote) ++ message = "Missing attribute value end quote: <#{name}>: <#{quote}>" ++ raise REXML::ParseException.new(message, @source) + end +- value = match[2] ++ @source.match(/\s*/um, true) + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" +diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb +index d2a6ad872e..2a3bd36695 100644 +--- a/lib/rexml/source.rb ++++ b/lib/rexml/source.rb +@@ -65,7 +65,11 @@ def encoding=(enc) + encoding_updated + end + +- def read ++ def read(term = nil) ++ end ++ ++ def read_until(term) ++ @scanner.scan_until(Regexp.union(term)) or @scanner.rest + end + + def match(pattern, cons=false) +@@ -151,9 +155,9 @@ def initialize(arg, block_size=500, encoding=nil) + end + end + +- def read ++ def read(term = nil) + begin +- @scanner << readline ++ @scanner << readline(term) + true + rescue Exception, NameError + @source = nil +@@ -161,6 +165,21 @@ def read + end + end + ++ def read_until(term) ++ pattern = Regexp.union(term) ++ data = [] ++ begin ++ until str = @scanner.scan_until(pattern) ++ @scanner << readline(term) ++ end ++ rescue EOFError ++ @scanner.rest ++ else ++ read if @scanner.eos? and !@source.eof? ++ str ++ end ++ end ++ + def match( pattern, cons=false ) + read if @scanner.eos? && @source + while true +@@ -204,8 +223,8 @@ def current_line + end + + private +- def readline +- str = @source.readline(@line_break) ++ def readline(term = nil) ++ str = @source.readline(term || @line_break) + if @pending_buffer + if str.nil? + str = @pending_buffer + +From f1df7d13b3e57a5e059273d2f0870163c08d7420 Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Mon, 20 May 2024 12:17:27 +0900 +Subject: [PATCH 7/7] Add support for old strscan + +Fix GH-132 + +If we support old strscan, users can also use strscan installed as a +default gem. + +Reported by Adam. Thanks!!! + +=== +strscan shipped with Ruby 2.5 lacks a method StringScanner#captures. +ruby/rexml#105 brought this dependency on the method. + +https://bugs.ruby-lang.org/issues/20516#note-11 +https://github.com/ruby/rexml/commit/f1df7d13b3e57a5e059273d2f0870163c08d7420 +--- + lib/rexml/parsers/baseparser.rb | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb +index 304835e407..006d8acb9b 100644 +--- a/lib/rexml/parsers/baseparser.rb ++++ b/lib/rexml/parsers/baseparser.rb +@@ -9,6 +9,17 @@ + + module REXML + module Parsers ++ if StringScanner::Version < "3.0.8" ++ module StringScannerCaptures ++ refine StringScanner do ++ def captures ++ values_at(*(1...size)) ++ end ++ end ++ end ++ using StringScannerCaptures ++ end ++ + # = Using the Pull Parser + # This API is experimental, and subject to change. + # parser = PullParser.new( "texttxet" ) diff --git a/SOURCES/rubygem-strscan-1.0.2-Accept-String-as-a-pattern.patch b/SOURCES/rubygem-strscan-1.0.2-Accept-String-as-a-pattern.patch new file mode 100644 index 0000000..59e734f --- /dev/null +++ b/SOURCES/rubygem-strscan-1.0.2-Accept-String-as-a-pattern.patch @@ -0,0 +1,264 @@ +From e56ac27d19cc3acdf6c1cb13b14224c43df5f5f6 Mon Sep 17 00:00:00 2001 +From: Kouhei Sutou +Date: Thu, 4 Apr 2019 17:52:50 +0900 +Subject: [PATCH] Accept String as a pattern + +It's only for head only match case such as StringScanner#scan. + +If we use a String as a pattern, we can improve match performance. +Here is a result of the including benchmark. It shows String as a +pattern is 1.25x faster than Regexp as a pattern. + + % rake benchmark + /tmp/local/bin/ruby -S benchmark-driver benchmark/scan.yaml + Warming up -------------------------------------- + regexp 12.094M i/s - 12.242M times in 1.012250s (82.69ns/i, 277clocks/i) + string 14.653M i/s - 14.889M times in 1.016124s (68.25ns/i, 252clocks/i) + Calculating ------------------------------------- + regexp 14.713M i/s - 36.281M times in 2.465970s (67.97ns/i, 254clocks/i) + string 18.422M i/s - 43.959M times in 2.386255s (54.28ns/i, 201clocks/i) + + Comparison: + string: 18421631.8 i/s + regexp: 14712660.7 i/s - 1.25x slower + +==== +Backport https://github.com/ruby/strscan/pull/4 for strscan. + +REXML fixes for CVE-2024-35716 depend on this feature. +--- + ext/strscan/strscan.c | 92 +++++++++++++++++++----------- + test/strscan/test_stringscanner.rb | 45 ++++++++++++++- + 2 files changed, 100 insertions(+), 37 deletions(-) + +diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c +index d6168a0d4f..43319b672e 100644 +--- a/ext/strscan/strscan.c ++++ b/ext/strscan/strscan.c +@@ -447,15 +447,18 @@ strscan_set_pos(VALUE self, VALUE v) + } + + static VALUE +-strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) ++strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) + { +- regex_t *rb_reg_prepare_re(VALUE re, VALUE str); + struct strscanner *p; +- regex_t *re; +- long ret; +- int tmpreg; + +- Check_Type(regex, T_REGEXP); ++ if (headonly) { ++ if (!RB_TYPE_P(pattern, T_REGEXP)) { ++ StringValue(pattern); ++ } ++ } ++ else { ++ Check_Type(pattern, T_REGEXP); ++ } + GET_SCANNER(self, p); + + CLEAR_MATCH_STATUS(p); +@@ -463,37 +466,55 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) + return Qnil; + } + +- p->regex = regex; +- re = rb_reg_prepare_re(regex, p->str); +- tmpreg = re != RREGEXP_PTR(regex); +- if (!tmpreg) RREGEXP(regex)->usecnt++; ++ if (RB_TYPE_P(pattern, T_REGEXP)) { ++ regex_t *rb_reg_prepare_re(VALUE re, VALUE str); ++ regex_t *re; ++ long ret; ++ int tmpreg; ++ ++ p->regex = pattern; ++ re = rb_reg_prepare_re(pattern, p->str); ++ tmpreg = re != RREGEXP_PTR(pattern); ++ if (!tmpreg) RREGEXP(pattern)->usecnt++; ++ ++ if (headonly) { ++ ret = onig_match(re, (UChar* )CURPTR(p), ++ (UChar* )(CURPTR(p) + S_RESTLEN(p)), ++ (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); ++ } ++ else { ++ ret = onig_search(re, ++ (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), ++ (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), ++ &(p->regs), ONIG_OPTION_NONE); ++ } ++ if (!tmpreg) RREGEXP(pattern)->usecnt--; ++ if (tmpreg) { ++ if (RREGEXP(pattern)->usecnt) { ++ onig_free(re); ++ } ++ else { ++ onig_free(RREGEXP_PTR(pattern)); ++ RREGEXP_PTR(pattern) = re; ++ } ++ } + +- if (headonly) { +- ret = onig_match(re, (UChar* )CURPTR(p), +- (UChar* )(CURPTR(p) + S_RESTLEN(p)), +- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); ++ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); ++ if (ret < 0) { ++ /* not matched */ ++ return Qnil; ++ } + } + else { +- ret = onig_search(re, +- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), +- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), +- &(p->regs), ONIG_OPTION_NONE); +- } +- if (!tmpreg) RREGEXP(regex)->usecnt--; +- if (tmpreg) { +- if (RREGEXP(regex)->usecnt) { +- onig_free(re); ++ rb_enc_check(p->str, pattern); ++ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) { ++ return Qnil; + } +- else { +- onig_free(RREGEXP_PTR(regex)); +- RREGEXP_PTR(regex) = re; ++ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { ++ return Qnil; + } +- } +- +- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); +- if (ret < 0) { +- /* not matched */ +- return Qnil; ++ onig_region_clear(&(p->regs)); ++ onig_region_set(&(p->regs), 0, 0, RSTRING_LEN(pattern)); + } + + MATCHED(p); +@@ -520,7 +541,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) + * p s.scan(/\w+/) # -> "test" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " +- * p s.scan(/\w+/) # -> "string" ++ * p s.scan("str") # -> "str" ++ * p s.scan(/\w+/) # -> "ing" + * p s.scan(/./) # -> nil + * + */ +@@ -539,6 +561,7 @@ strscan_scan(VALUE self, VALUE re) + * s = StringScanner.new('test string') + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\w+/) # -> 4 ++ * p s.match?("test") # -> 4 + * p s.match?(/\s+/) # -> nil + */ + static VALUE +@@ -560,7 +583,8 @@ strscan_match_p(VALUE self, VALUE re) + * p s.skip(/\w+/) # -> 4 + * p s.skip(/\w+/) # -> nil + * p s.skip(/\s+/) # -> 1 +- * p s.skip(/\w+/) # -> 6 ++ * p s.skip("st") # -> 2 ++ * p s.skip(/\w+/) # -> 4 + * p s.skip(/./) # -> nil + * + */ +diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb +index 3423f9cfed..63b1ce1f9b 100644 +--- a/test/strscan/test_stringscanner.rb ++++ b/test/strscan/test_stringscanner.rb +@@ -282,6 +282,22 @@ def test_scan + assert_equal "", s.scan(//) + end + ++ def test_scan_string ++ s = StringScanner.new('stra strb strc') ++ assert_equal 'str', s.scan('str') ++ assert_equal 'str', s[0] ++ assert_equal 3, s.pos ++ assert_equal false, s.tainted? ++ assert_equal 'a ', s.scan('a ') ++ ++ str = 'stra strb strc'.dup ++ str.taint ++ s = StringScanner.new(str, false) ++ matched = s.scan('str') ++ assert_equal 'str', matched ++ assert_equal true, matched.tainted? ++ end ++ + def test_skip + s = StringScanner.new('stra strb strc', true) + assert_equal 4, s.skip(/\w+/) +@@ -367,8 +383,10 @@ def test_matched + assert_equal false, s.matched.tainted? + s.scan(/\s+/) + assert_equal ' ', s.matched ++ s.scan('st') ++ assert_equal 'st', s.matched + s.scan(/\w+/) +- assert_equal 'strb', s.matched ++ assert_equal 'rb', s.matched + s.scan(/\s+/) + assert_equal ' ', s.matched + s.scan(/\w+/) +@@ -483,7 +501,7 @@ def test_pre_match + s.skip(/\s/) + assert_equal 'a', s.pre_match + assert_equal false, s.pre_match.tainted? +- s.scan(/\w/) ++ s.scan('b') + assert_equal 'a ', s.pre_match + s.scan_until(/c/) + assert_equal 'a b ', s.pre_match +@@ -513,7 +531,7 @@ def test_post_match + assert_equal ' b c d e', s.post_match + s.skip(/\s/) + assert_equal 'b c d e', s.post_match +- s.scan(/\w/) ++ s.scan('b') + assert_equal ' c d e', s.post_match + s.scan_until(/c/) + assert_equal ' d e', s.post_match +@@ -589,6 +607,20 @@ def test_encoding + assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding) + end + ++ def test_encoding_string ++ str = "\xA1\xA2".dup.force_encoding("euc-jp") ++ ss = StringScanner.new(str) ++ assert_equal(str.dup, ss.scan(str.dup)) ++ end ++ ++ def test_invalid_encoding_string ++ str = "\xA1\xA2".dup.force_encoding("euc-jp") ++ ss = StringScanner.new(str) ++ assert_raise(Encoding::CompatibilityError) do ++ ss.scan(str.encode("UTF-8")) ++ end ++ end ++ + def test_generic_regexp + ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp")) + t = ss.scan(/./) +@@ -643,6 +675,13 @@ def test_exist_p + assert_equal(nil, s.exist?(/e/)) + end + ++ def test_exist_p_string ++ s = StringScanner.new("test string") ++ assert_raise(TypeError) do ++ s.exist?(" ") ++ end ++ end ++ + def test_skip_until + s = StringScanner.new("Foo Bar Baz") + assert_equal(3, s.skip_until(/Foo/)) diff --git a/SPECS/ruby.spec b/SPECS/ruby.spec index e7eab3a..2a8e970 100644 --- a/SPECS/ruby.spec +++ b/SPECS/ruby.spec @@ -21,7 +21,7 @@ %endif -%global release 111 +%global release 112 %{!?release_string:%global release_string %{?development_release:0.}%{release}%{?development_release:.%{development_release}}%{?dist}} @@ -230,6 +230,42 @@ Patch41: ruby-2.7.8-Fix-CVE-2023-28755-ReDos-vulnerability-in-URI.patch # https://github.com/ruby/ruby/commit/2cb830602f52e7e76c6781115e7938b21f881c4f # https://github.com/ruby/ruby/commit/e3f18f7d2e034f20053d7bf2fc7a50f8b7e1a27a Patch42: ruby-2.7.8-Fix-CVE-2023-28756-ReDoS-vulnerability-in-Time.patch +# Fix net-http test errors due to expired certificate +# https://github.com/ruby/ruby/commit/d3933fc753187a055a4904af82f5f3794c88c416 +# https://bugs.ruby-lang.org/issues/20106 +Patch43: ruby-3.4.0-ruby-net-http-Renew-test-certificates.patch +# CVE-2023-36617 ReDoS vulnerability - upstream's incomplete fix +# for CVE-2023-28755. +# Tests not included as assert_linear_time was introduced in Ruby 2.7. +# https://github.com/ruby/ruby/commit/616926b55e306a0704254a7ddfd6e9834d06c7f2 +Patch44: ruby-3.0.7-Fix-CVE-2023-36617-Upstreams-incomplete-fix-for-CVE-2023-28755.patch +# CVE-2024-27280 Buffer overread vulnerability in StringIO. +# Backported from: +# https://github.com/ruby/ruby/commit/bd9424c71c15896a997d5a092bf5e1ed453defa6 +Patch45: ruby-3.0.7-Fix-CVE-2024-27280-Buffer-overread-in-StringIO.patch +# CVE-2024-27281 RCE vulnerability with .rdoc_options in RDoc. +# Backported from: +# https://github.com/ruby/ruby/commit/7957a25edf844c966de45848fa7e9e2513955660 +Patch46: ruby-3.0.7-Fix-CVE-2024-27281-RCE-vulnerability-with-rdoc_options.patch +# CVE-2024-27282 Arbitrary memory address read vulnerability with Regex search. +# Backported from: +# https://github.com/ruby/ruby/commit/6c6dca749d3f732b7be04bae20095a040c50fdb8 +Patch47: ruby-3.0.7-Fix-CVE-2024-27282-Memory-address-read-with-Regex.patch +# Fix for REXML CVE-2024-35176 depends on being able to pass a string to the +# scan method in addition to a regex. +# https://github.com/ruby/strscan/pull/4 +Patch48: rubygem-strscan-1.0.2-Accept-String-as-a-pattern.patch +# CVE-2024-35176 REXML: DoS parsing an XML with many `<`s in an attribute value. +# The actual fix for the CVE is https://github.com/ruby/rexml/pull/126 , +# but that PR is depending on the content of a few previous PRs and commits. +# https://github.com/ruby/rexml/commit/694239f0855668c986feba6f1b395ecd94a1f0bc +# https://github.com/ruby/rexml/commit/810d2285235d5501a0a124f300832e6e9515da3c +# https://github.com/ruby/rexml/commit/77128555476cb0db798e2912fb3a07d6411dc320 +# https://github.com/ruby/rexml/commit/370666e314816b57ecd5878e757224c3b6bc93f5 +# https://github.com/ruby/rexml/commit/0496940d5998ccbc50d16fb734993ab50fc60c2d +# https://github.com/ruby/rexml/commit/4325835f92f3f142ebd91a3fdba4e1f1ab7f1cfb +# https://github.com/ruby/rexml/commit/f1df7d13b3e57a5e059273d2f0870163c08d7420 +Patch49: rubygem-rexml-3.2.9-Fix-CVE-2024-35176-DoS-in-REXML.patch Requires: %{name}-libs%{?_isa} = %{version}-%{release} @@ -643,6 +679,13 @@ sed -i 's/"evaluation\/incorrect_words.yaml"\.freeze, //' \ %patch40 -p1 %patch41 -p1 %patch42 -p1 +%patch43 -p1 +%patch44 -p1 +%patch45 -p1 +%patch46 -p1 +%patch47 -p1 +%patch48 -p1 +%patch49 -p1 # Provide an example of usage of the tapset: cp -a %{SOURCE3} . @@ -868,6 +911,18 @@ DISABLE_TESTS="$DISABLE_TESTS -n !/test_segv_\(setproctitle\|test\|loaded_featur # https://bugs.ruby-lang.org/issues/14175 sed -i '/def test_mdns_each_address$/,/^ end$/ s/^/#/' test/resolv/test_mdns.rb +# Provide workaround for s390x zlib errors in tests on RHEL 8. +# The library works, but there seems to be chip acceleration used +# resulting in failing asserts against hardcoded values that were +# created for a different algorithm than is the default on that platform. +# Fix taken from: +# https://github.com/ruby/zlib/commit/280a1b5905d752af25e9f09e6511388cb4260eb2 +%ifarch s390x + # Force chip acceleration off. + # https://www.ibm.com/docs/en/linux-on-systems?topic=o-applications-2 + export DFLTCC=0 +%endif + # For now, disable test incompatible with OpenSSL 1.1.1: # https://github.com/rubygems/rubygems/issues/2388 DISABLE_TESTS="$DISABLE_TESTS -n !/test_do_not_allow_invalid_client_cert_auth_connection/" @@ -1195,6 +1250,23 @@ OPENSSL_SYSTEM_CIPHERS_OVERRIDE=xyz_nonexistent_file OPENSSL_CONF='' \ %{gem_dir}/specifications/xmlrpc-%{xmlrpc_version}.gemspec %changelog +* Tue May 21 2024 Jarek Prokop - 2.5.9-112 +- Fix ReDoS vulnerability - upstream's incomplete fix for CVE-2023-28755. + (CVE-2023-36617) + Resolves: RHEL-5614 +- Fix Buffer overread vulnerability in StringIO. + (CVE-2024-27280) + Resolves: RHEL-34125 +- Fix RCE vulnerability with .rdoc_options in RDoc. + (CVE-2024-27281) + Resolves: RHEL-34117 +- Fix Arbitrary memory address read vulnerability with Regex search. + (CVE-2024-27282) + Resolves: RHEL-33867 +- Fix REXML DoS parsing an XML with many `<`s in an attribute value. + (CVE-2024-35176) + Resolves: RHEL-37877 + * Tue Dec 12 2023 MSVSphere Packaging Team - 2.5.9-111 - Rebuilt for MSVSphere 8.8