From a8a0334dfc94432f22f0e1452dfaf2dabf2fe780 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Fri, 7 Oct 2016 15:02:47 +0200 Subject: [PATCH] rhbz#1382401: Support surrogate pairs in HTMLOutFuncs (cherry picked from commit 375b99cad4a79d26a6cbcd0f71bc12b312d95818) Conflicts: svtools/source/svhtml/htmlout.cxx plus cherry-pick of 6131bf9c96fb2ae37decf13e453f27304707271b "Minor performance improvement of previous patch" Change-Id: Ib578f758e4f5f355a79a014c2ad4660924dd34a4 --- include/svtools/htmlout.hxx | 2 +- svtools/source/svhtml/htmlout.cxx | 40 +++++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/svtools/htmlout.hxx b/include/svtools/htmlout.hxx index 715f0ba..e8a5d50 100644 --- a/include/svtools/htmlout.hxx +++ b/include/svtools/htmlout.hxx @@ -60,7 +60,7 @@ struct HTMLOutFuncs SVT_DLLPUBLIC static SvStream& Out_AsciiTag( SvStream&, const sal_Char* pStr, bool bOn = true, rtl_TextEncoding eDestEnc = RTL_TEXTENCODING_MS_1252); - SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_Unicode cChar, + SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_uInt32 cChar, HTMLOutContext& rContext, OUString *pNonConvertableChars = nullptr ); SVT_DLLPUBLIC static SvStream& Out_String( SvStream&, const OUString&, diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx index 99d9e38..4305338 100644 --- a/svtools/source/svhtml/htmlout.cxx +++ b/svtools/source/svhtml/htmlout.cxx @@ -55,7 +55,7 @@ HTMLOutContext::~HTMLOutContext() rtl_destroyUnicodeToTextConverter( m_hConv ); } -static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c, +static const sal_Char *lcl_svhtml_GetEntityForChar( sal_uInt32 c, rtl_TextEncoding eDestEnc ) { const sal_Char* pStr = nullptr; @@ -388,10 +388,12 @@ static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c, return pStr; } -static OString lcl_ConvertCharToHTML( sal_Unicode c, +static OString lcl_ConvertCharToHTML( sal_uInt32 c, HTMLOutContext& rContext, OUString *pNonConvertableChars ) { + assert(rtl::isUnicodeCodePoint(c)); + OStringBuffer aDest; DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW != rContext.m_eDestEnc, "wrong destination encoding" ); @@ -439,8 +441,18 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c, } else { + sal_Unicode utf16[2]; + sal_Size n; + if (c < 0x10000) { + utf16[0] = c; + n = 1; + } else { + utf16[0] = rtl::getHighSurrogate(c); + utf16[1] = rtl::getLowSurrogate(c); + n = 2; + } sal_Size nLen = rtl_convertUnicodeToText( rContext.m_hConv, - rContext.m_hContext, &c, 1, + rContext.m_hContext, utf16, n, cBuffer, TXTCONV_BUFFER_SIZE, nFlags, &nInfo, &nSrcChars ); @@ -466,11 +478,15 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c, while( nLen-- ) aDest.append(*pBuffer++); - aDest.append('&').append('#').append(static_cast(c)) + aDest.append('&').append('#').append(static_cast(c)) + // Unicode code points guaranteed to fit into sal_Int32 .append(';'); - if( pNonConvertableChars && - -1 == pNonConvertableChars->indexOf( c ) ) - (*pNonConvertableChars) += OUString(c); + if( pNonConvertableChars ) + { + OUString cs(&c, 1); + if( -1 == pNonConvertableChars->indexOf( cs ) ) + (*pNonConvertableChars) += cs; + } } } return aDest.makeStringAndClear(); @@ -505,9 +521,9 @@ OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc, { HTMLOutContext aContext( eDestEnc ); OStringBuffer aDest; - for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; i++ ) + for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; ) aDest.append(lcl_ConvertCharToHTML( - rSrc[i], aContext, pNonConvertableChars)); + rSrc.iterateCodePoints(&i), aContext, pNonConvertableChars)); aDest.append(lcl_FlushToAscii(aContext)); return aDest.makeStringAndClear(); } @@ -525,7 +541,7 @@ SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, const sal_Char *pStr, return rStream; } -SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_Unicode c, +SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c, HTMLOutContext& rContext, OUString *pNonConvertableChars ) { @@ -540,8 +556,8 @@ SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr, { HTMLOutContext aContext( eDestEnc ); sal_Int32 nLen = rOUStr.getLength(); - for( sal_Int32 n = 0; n < nLen; n++ ) - HTMLOutFuncs::Out_Char( rStream, rOUStr[n], + for( sal_Int32 n = 0; n < nLen; ) + HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n), aContext, pNonConvertableChars ); HTMLOutFuncs::FlushToAscii( rStream, aContext ); return rStream; -- 2.7.4