You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
6.6 KiB
151 lines
6.6 KiB
From 463ae6186be38c6c4290b260f86b1e2044e233f0 Mon Sep 17 00:00:00 2001
|
|
From: Stephan Bergmann <sbergman@redhat.com>
|
|
Date: Fri, 7 Oct 2016 15:02:47 +0200
|
|
Subject: [PATCH] rhbz#1382401: Support surrogate pairs in HTMLOutFuncs
|
|
|
|
(cherry picked from commit 375b99cad4a79d26a6cbcd0f71bc12b312d95818)
|
|
Conflicts:
|
|
svtools/source/svhtml/htmlout.cxx
|
|
plus cherry-pick of 6131bf9c96fb2ae37decf13e453f27304707271b "Minor performance
|
|
improvement of previous patch"
|
|
|
|
Change-Id: Ib578f758e4f5f355a79a014c2ad4660924dd34a4
|
|
---
|
|
include/svtools/htmlout.hxx | 2 +-
|
|
svtools/source/svhtml/htmlout.cxx | 44 ++++++++++++++++++++++++++-------------
|
|
2 files changed, 31 insertions(+), 15 deletions(-)
|
|
|
|
diff --git a/include/svtools/htmlout.hxx b/include/svtools/htmlout.hxx
|
|
index 715f0ba..e8a5d50 100644
|
|
--- a/include/svtools/htmlout.hxx
|
|
+++ b/include/svtools/htmlout.hxx
|
|
@@ -60,7 +60,7 @@ struct HTMLOutFuncs
|
|
SVT_DLLPUBLIC static SvStream& Out_AsciiTag( SvStream&, const sal_Char* pStr,
|
|
bool bOn = true,
|
|
rtl_TextEncoding eDestEnc = RTL_TEXTENCODING_MS_1252);
|
|
- SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_Unicode cChar,
|
|
+ SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_uInt32 cChar,
|
|
HTMLOutContext& rContext,
|
|
OUString *pNonConvertableChars = nullptr );
|
|
SVT_DLLPUBLIC static SvStream& Out_String( SvStream&, const OUString&,
|
|
diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx
|
|
index 99d9e38..13ff6d8 100644
|
|
--- a/svtools/source/svhtml/htmlout.cxx
|
|
+++ b/svtools/source/svhtml/htmlout.cxx
|
|
@@ -55,7 +55,7 @@ HTMLOutContext::~HTMLOutContext()
|
|
rtl_destroyUnicodeToTextConverter( m_hConv );
|
|
}
|
|
|
|
-static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
|
|
+static const sal_Char *lcl_svhtml_GetEntityForChar( sal_uInt32 c,
|
|
rtl_TextEncoding eDestEnc )
|
|
{
|
|
const sal_Char* pStr = nullptr;
|
|
@@ -388,10 +388,12 @@ static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
|
|
return pStr;
|
|
}
|
|
|
|
-static OString lcl_ConvertCharToHTML( sal_Unicode c,
|
|
+static OString lcl_ConvertCharToHTML( sal_uInt32 c,
|
|
HTMLOutContext& rContext,
|
|
OUString *pNonConvertableChars )
|
|
{
|
|
+ assert(rtl::isUnicodeCodePoint(c));
|
|
+
|
|
OStringBuffer aDest;
|
|
DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW != rContext.m_eDestEnc,
|
|
"wrong destination encoding" );
|
|
@@ -427,7 +429,7 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
|
|
if( pStr )
|
|
{
|
|
sal_Size nLen = rtl_convertUnicodeToText(
|
|
- rContext.m_hConv, rContext.m_hContext, &c, 0,
|
|
+ rContext.m_hConv, rContext.m_hContext, nullptr, 0,
|
|
cBuffer, TXTCONV_BUFFER_SIZE,
|
|
nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH,
|
|
&nInfo, &nSrcChars );
|
|
@@ -439,8 +441,18 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
|
|
}
|
|
else
|
|
{
|
|
+ sal_Unicode utf16[2];
|
|
+ sal_Size n;
|
|
+ if (c < 0x10000) {
|
|
+ utf16[0] = c;
|
|
+ n = 1;
|
|
+ } else {
|
|
+ utf16[0] = rtl::getHighSurrogate(c);
|
|
+ utf16[1] = rtl::getLowSurrogate(c);
|
|
+ n = 2;
|
|
+ }
|
|
sal_Size nLen = rtl_convertUnicodeToText( rContext.m_hConv,
|
|
- rContext.m_hContext, &c, 1,
|
|
+ rContext.m_hContext, utf16, n,
|
|
cBuffer, TXTCONV_BUFFER_SIZE,
|
|
nFlags,
|
|
&nInfo, &nSrcChars );
|
|
@@ -457,7 +469,7 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
|
|
// entity.
|
|
// coverity[callee_ptr_arith]
|
|
nLen = rtl_convertUnicodeToText(
|
|
- rContext.m_hConv, rContext.m_hContext, &c, 0,
|
|
+ rContext.m_hConv, rContext.m_hContext, nullptr, 0,
|
|
cBuffer, TXTCONV_BUFFER_SIZE,
|
|
nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH,
|
|
&nInfo, &nSrcChars );
|
|
@@ -466,11 +478,15 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
|
|
while( nLen-- )
|
|
aDest.append(*pBuffer++);
|
|
|
|
- aDest.append('&').append('#').append(static_cast<sal_Int64>(c))
|
|
+ aDest.append('&').append('#').append(static_cast<sal_Int32>(c))
|
|
+ // Unicode code points guaranteed to fit into sal_Int32
|
|
.append(';');
|
|
- if( pNonConvertableChars &&
|
|
- -1 == pNonConvertableChars->indexOf( c ) )
|
|
- (*pNonConvertableChars) += OUString(c);
|
|
+ if( pNonConvertableChars )
|
|
+ {
|
|
+ OUString cs(&c, 1);
|
|
+ if( -1 == pNonConvertableChars->indexOf( cs ) )
|
|
+ (*pNonConvertableChars) += cs;
|
|
+ }
|
|
}
|
|
}
|
|
return aDest.makeStringAndClear();
|
|
@@ -505,9 +521,9 @@ OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc,
|
|
{
|
|
HTMLOutContext aContext( eDestEnc );
|
|
OStringBuffer aDest;
|
|
- for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; i++ )
|
|
+ for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; )
|
|
aDest.append(lcl_ConvertCharToHTML(
|
|
- rSrc[i], aContext, pNonConvertableChars));
|
|
+ rSrc.iterateCodePoints(&i), aContext, pNonConvertableChars));
|
|
aDest.append(lcl_FlushToAscii(aContext));
|
|
return aDest.makeStringAndClear();
|
|
}
|
|
@@ -525,7 +541,7 @@ SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, const sal_Char *pStr,
|
|
return rStream;
|
|
}
|
|
|
|
-SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_Unicode c,
|
|
+SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c,
|
|
HTMLOutContext& rContext,
|
|
OUString *pNonConvertableChars )
|
|
{
|
|
@@ -540,8 +556,8 @@ SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr,
|
|
{
|
|
HTMLOutContext aContext( eDestEnc );
|
|
sal_Int32 nLen = rOUStr.getLength();
|
|
- for( sal_Int32 n = 0; n < nLen; n++ )
|
|
- HTMLOutFuncs::Out_Char( rStream, rOUStr[n],
|
|
+ for( sal_Int32 n = 0; n < nLen; )
|
|
+ HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n),
|
|
aContext, pNonConvertableChars );
|
|
HTMLOutFuncs::FlushToAscii( rStream, aContext );
|
|
return rStream;
|
|
--
|
|
2.7.4
|
|
|