Resolves: rhbz#1382401 broken export of emojis to HTML

f41
Stephan Bergmann 8 years ago
parent a4d8ed3aa8
commit 602fff2940

@ -0,0 +1,132 @@
From a8a0334dfc94432f22f0e1452dfaf2dabf2fe780 Mon Sep 17 00:00:00 2001
From: Stephan Bergmann <sbergman@redhat.com>
Date: Fri, 7 Oct 2016 15:02:47 +0200
Subject: [PATCH] rhbz#1382401: Support surrogate pairs in HTMLOutFuncs
(cherry picked from commit 375b99cad4a79d26a6cbcd0f71bc12b312d95818)
Conflicts:
svtools/source/svhtml/htmlout.cxx
plus cherry-pick of 6131bf9c96fb2ae37decf13e453f27304707271b "Minor performance
improvement of previous patch"
Change-Id: Ib578f758e4f5f355a79a014c2ad4660924dd34a4
---
include/svtools/htmlout.hxx | 2 +-
svtools/source/svhtml/htmlout.cxx | 40 +++++++++++++++++++++++++++------------
2 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/include/svtools/htmlout.hxx b/include/svtools/htmlout.hxx
index 715f0ba..e8a5d50 100644
--- a/include/svtools/htmlout.hxx
+++ b/include/svtools/htmlout.hxx
@@ -60,7 +60,7 @@ struct HTMLOutFuncs
SVT_DLLPUBLIC static SvStream& Out_AsciiTag( SvStream&, const sal_Char* pStr,
bool bOn = true,
rtl_TextEncoding eDestEnc = RTL_TEXTENCODING_MS_1252);
- SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_Unicode cChar,
+ SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_uInt32 cChar,
HTMLOutContext& rContext,
OUString *pNonConvertableChars = nullptr );
SVT_DLLPUBLIC static SvStream& Out_String( SvStream&, const OUString&,
diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx
index 99d9e38..4305338 100644
--- a/svtools/source/svhtml/htmlout.cxx
+++ b/svtools/source/svhtml/htmlout.cxx
@@ -55,7 +55,7 @@ HTMLOutContext::~HTMLOutContext()
rtl_destroyUnicodeToTextConverter( m_hConv );
}
-static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
+static const sal_Char *lcl_svhtml_GetEntityForChar( sal_uInt32 c,
rtl_TextEncoding eDestEnc )
{
const sal_Char* pStr = nullptr;
@@ -388,10 +388,12 @@ static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
return pStr;
}
-static OString lcl_ConvertCharToHTML( sal_Unicode c,
+static OString lcl_ConvertCharToHTML( sal_uInt32 c,
HTMLOutContext& rContext,
OUString *pNonConvertableChars )
{
+ assert(rtl::isUnicodeCodePoint(c));
+
OStringBuffer aDest;
DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW != rContext.m_eDestEnc,
"wrong destination encoding" );
@@ -439,8 +441,18 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
}
else
{
+ sal_Unicode utf16[2];
+ sal_Size n;
+ if (c < 0x10000) {
+ utf16[0] = c;
+ n = 1;
+ } else {
+ utf16[0] = rtl::getHighSurrogate(c);
+ utf16[1] = rtl::getLowSurrogate(c);
+ n = 2;
+ }
sal_Size nLen = rtl_convertUnicodeToText( rContext.m_hConv,
- rContext.m_hContext, &c, 1,
+ rContext.m_hContext, utf16, n,
cBuffer, TXTCONV_BUFFER_SIZE,
nFlags,
&nInfo, &nSrcChars );
@@ -466,11 +478,15 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
while( nLen-- )
aDest.append(*pBuffer++);
- aDest.append('&').append('#').append(static_cast<sal_Int64>(c))
+ aDest.append('&').append('#').append(static_cast<sal_Int32>(c))
+ // Unicode code points guaranteed to fit into sal_Int32
.append(';');
- if( pNonConvertableChars &&
- -1 == pNonConvertableChars->indexOf( c ) )
- (*pNonConvertableChars) += OUString(c);
+ if( pNonConvertableChars )
+ {
+ OUString cs(&c, 1);
+ if( -1 == pNonConvertableChars->indexOf( cs ) )
+ (*pNonConvertableChars) += cs;
+ }
}
}
return aDest.makeStringAndClear();
@@ -505,9 +521,9 @@ OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc,
{
HTMLOutContext aContext( eDestEnc );
OStringBuffer aDest;
- for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; i++ )
+ for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; )
aDest.append(lcl_ConvertCharToHTML(
- rSrc[i], aContext, pNonConvertableChars));
+ rSrc.iterateCodePoints(&i), aContext, pNonConvertableChars));
aDest.append(lcl_FlushToAscii(aContext));
return aDest.makeStringAndClear();
}
@@ -525,7 +541,7 @@ SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, const sal_Char *pStr,
return rStream;
}
-SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_Unicode c,
+SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c,
HTMLOutContext& rContext,
OUString *pNonConvertableChars )
{
@@ -540,8 +556,8 @@ SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr,
{
HTMLOutContext aContext( eDestEnc );
sal_Int32 nLen = rOUStr.getLength();
- for( sal_Int32 n = 0; n < nLen; n++ )
- HTMLOutFuncs::Out_Char( rStream, rOUStr[n],
+ for( sal_Int32 n = 0; n < nLen; )
+ HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n),
aContext, pNonConvertableChars );
HTMLOutFuncs::FlushToAscii( rStream, aContext );
return rStream;
--
2.7.4

@ -55,7 +55,7 @@ Summary: Free Software Productivity Suite
Name: libreoffice
Epoch: 1
Version: %{libo_version}.2
Release: 2%{?libo_prerelease}%{?dist}
Release: 3%{?libo_prerelease}%{?dist}
License: (MPLv1.1 or LGPLv3+) and LGPLv3 and LGPLv2+ and BSD and (MPLv1.1 or GPLv2 or LGPLv2 or Netscape) and Public Domain and ASL 2.0 and Artistic and MPLv2.0 and CC0
URL: http://www.libreoffice.org/
@ -254,6 +254,7 @@ Patch19: 0001-fix-build-on-32-bit-ARM-on-latest-Rawhide.patch
Patch20: 0001-only-date-autofilter-menus-need-the-space-for-the-tr.patch
Patch21: 0001-rhbz-1353069-don-t-record-undo-information-in-the-cl.patch
Patch22: 0001-Resolves-tdf-101711-problems-with-attempt-to-remove-.patch
Patch23: 0001-rhbz-1382401-Support-surrogate-pairs-in-HTMLOutFuncs.patch
%if 0%{?fedora} >= 26
Patch400: 0001-Switch-from-orcus-0.11-to-orcus-0.12.patch
@ -2316,6 +2317,9 @@ done
%endif
%changelog
* Fri Oct 07 2016 Stephan Bergmann <sbergman@redhat.com> - 1:5.2.2.2-3
- Resolves: rhbz#1382401 broken export of emojis to HTML
* Thu Sep 29 2016 David Tardon <dtardon@redhat.com> - 1:5.2.2.2-2
- rebuild for liborcus 0.12

Loading…
Cancel
Save