From ce59f2eb1aeb371fe1643414f06618dbe031979f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 24 Oct 2024 14:45:31 +0900 Subject: [PATCH] parser: fix a bug that �x...; is accepted as a character reference --- lib/rexml/parsers/baseparser.rb | 10 +++++++--- test/parse/test_character_reference.rb | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 7bd8adf..b4547ba 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -469,8 +469,12 @@ def unnormalize( string, entities=nil, filter=nil ) return rv if matches.size == 0 - rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { + rv.gsub!( /&#((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') + if m.start_with?("x") + code_point = Integer(m[1..-1], 16) + else + code_point = Integer(m, 10) + end + [code_point].pack('U*') } matches.collect!{|x|x[0]}.compact! if matches.size > 0