diff -rupN podofo-0.9.6/src/doc/PdfPagesTree.cpp podofo-0.9.6-new/src/doc/PdfPagesTree.cpp --- podofo-0.9.6/src/doc/PdfPagesTree.cpp 2018-02-25 23:36:48.000000000 +0100 +++ podofo-0.9.6-new/src/doc/PdfPagesTree.cpp 2018-12-19 22:42:36.898106630 +0100 @@ -51,7 +51,7 @@ PdfPagesTree::PdfPagesTree( PdfVecObject : PdfElement( "Pages", pParent ), m_cache( 0 ) { - GetObject()->GetDictionary().AddKey( "Kids", PdfArray() ); // kids->Reference() + GetObject()->GetDictionary().AddKey( "Kids", PdfArray() ); // kids->Reference() GetObject()->GetDictionary().AddKey( "Count", PdfObject( static_cast(PODOFO_LL_LITERAL(0)) ) ); } @@ -59,13 +59,13 @@ PdfPagesTree::PdfPagesTree( PdfObject* p : PdfElement( "Pages", pPagesRoot ), m_cache( GetChildCount( pPagesRoot ) ) { - if( !this->GetObject() ) + if( !this->GetObject() ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } } -PdfPagesTree::~PdfPagesTree() +PdfPagesTree::~PdfPagesTree() { m_cache.ClearCache(); } @@ -90,7 +90,7 @@ PdfPage* PdfPagesTree::GetPage( int nInd // Not in cache -> search tree PdfObjectList lstParents; PdfObject* pObj = this->GetPageNode(nIndex, this->GetRoot(), lstParents); - if( pObj ) + if( pObj ) { pPage = new PdfPage( pObj, lstParents ); m_cache.AddPageObject( nIndex, pPage ); @@ -105,13 +105,13 @@ PdfPage* PdfPagesTree::GetPage( const Pd // We have to search through all pages, // as this is the only way // to instantiate the PdfPage with a correct list of parents - for( int i=0;iGetTotalNumberOfPages();i++ ) + for( int i=0;iGetTotalNumberOfPages();i++ ) { PdfPage* pPage = this->GetPage( i ); - if( pPage && pPage->GetObject()->Reference() == ref ) + if( pPage && pPage->GetObject()->Reference() == ref ) return pPage; } - + return NULL; } @@ -130,7 +130,7 @@ void PdfPagesTree::InsertPage( int nAfte bInsertBefore = true; nAfterPageIndex = 0; } - else if( nAfterPageIndex < 0 ) + else if( nAfterPageIndex < 0 ) { // Only ePdfPageInsertionPoint_InsertBeforeFirstPage is valid here PdfError::LogMessage( eLogSeverity_Information, @@ -149,9 +149,9 @@ void PdfPagesTree::InsertPage( int nAfte lstParents ); } //printf("pPageBefore=%p lstParents=%i\n", pPageBefore,lstParents.size() ); - if( !pPageBefore || lstParents.size() == 0 ) + if( !pPageBefore || lstParents.size() == 0 ) { - if( this->GetTotalNumberOfPages() != 0 ) + if( this->GetTotalNumberOfPages() != 0 ) { PdfError::LogMessage( eLogSeverity_Critical, "Cannot find page %i or page %i has no parents. Cannot insert new page.", @@ -188,7 +188,7 @@ void PdfPagesTree::InsertPages( int nAft bInsertBefore = true; nAfterPageIndex = 0; } - else if( nAfterPageIndex < 0 ) + else if( nAfterPageIndex < 0 ) { // Only ePdfPageInsertionPoint_InsertBeforeFirstPage is valid here PdfError::LogMessage( eLogSeverity_Information, @@ -204,9 +204,9 @@ void PdfPagesTree::InsertPages( int nAft pPageBefore = this->GetPageNode( nAfterPageIndex, this->GetRoot(), lstParents ); } - if( !pPageBefore || lstParents.size() == 0 ) + if( !pPageBefore || lstParents.size() == 0 ) { - if( this->GetTotalNumberOfPages() != 0 ) + if( this->GetTotalNumberOfPages() != 0 ) { PdfError::LogMessage( eLogSeverity_Critical, "Cannot find page %i or page %i has no parents. Cannot insert new page.", @@ -239,7 +239,7 @@ PdfPage* PdfPagesTree::CreatePage( const InsertPage( this->GetTotalNumberOfPages() - 1, pPage ); m_cache.AddPageObject( this->GetTotalNumberOfPages(), pPage ); - + return pPage; } @@ -278,12 +278,12 @@ void PdfPagesTree::DeletePage( int nPage { // Delete from cache m_cache.DeletePage( nPageNumber ); - + // Delete from pages tree PdfObjectList lstParents; PdfObject* pPageNode = this->GetPageNode( nPageNumber, this->GetRoot(), lstParents ); - if( !pPageNode ) + if( !pPageNode ) { PdfError::LogMessage( eLogSeverity_Information, "Invalid argument to PdfPagesTree::DeletePage: %i - Page not found\n", @@ -291,11 +291,11 @@ void PdfPagesTree::DeletePage( int nPage PODOFO_RAISE_ERROR( ePdfError_PageNotFound ); } - if( lstParents.size() > 0 ) + if( lstParents.size() > 0 ) { PdfObject* pParent = lstParents.back(); int nKidsIndex = this->GetPosInKids( pPageNode, pParent ); - + DeletePageFromNode( pParent, lstParents, nKidsIndex, pPageNode ); } else @@ -312,10 +312,10 @@ void PdfPagesTree::DeletePage( int nPage // Private methods //////////////////////////////////////////////////// -PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent, - PdfObjectList & rLstParents ) +PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent, + PdfObjectList & rLstParents ) { - if( !pParent ) + if( !pParent ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } @@ -325,21 +325,20 @@ PdfObject* PdfPagesTree::GetPageNode( in PODOFO_RAISE_ERROR( ePdfError_InvalidKey ); } - + const PdfObject* pObj = pParent->GetIndirectKey( "Kids" ); if( pObj == NULL || !pObj->IsArray() ) { PODOFO_RAISE_ERROR( ePdfError_InvalidDataType ); } - const PdfArray & rKidsArray = pObj->GetArray(); + const PdfArray & rKidsArray = pObj->GetArray(); PdfArray::const_iterator it = rKidsArray.begin(); - const size_t numDirectKids = rKidsArray.size(); const size_t numKids = GetChildCount(pParent); // use <= since nPageNum is 0-based - if( static_cast(numKids) <= nPageNum ) + if( static_cast(numKids) <= nPageNum ) { PdfError::LogMessage( eLogSeverity_Critical, "Cannot retrieve page %i from a document with only %i pages.", @@ -347,92 +346,71 @@ PdfObject* PdfPagesTree::GetPageNode( in return NULL; } - //printf("Fetching: %i %i %i\n", numDirectKids, numKids, nPageNum ); - if( numDirectKids == numKids && static_cast(nPageNum) < numDirectKids ) - { - // This node has only page nodes as kids, - // so we can access the array directly - rLstParents.push_back( pParent ); - return GetPageNodeFromArray( nPageNum, rKidsArray, rLstParents ); - } - else - { - // We have to traverse the tree - while( it != rKidsArray.end() ) - { - if( (*it).IsArray() ) - { // Fixes PDFs broken by having trees with arrays nested once - - rLstParents.push_back( pParent ); - - // the following code is to find the reference to log this with - const PdfReference & rIterArrayRef = (*it).Reference(); - PdfReference refToLog; - bool isDirectObject // don't worry about 0-num. indirect ones - = ( !(rIterArrayRef.ObjectNumber() ) ); - if ( isDirectObject ) - { - if ( !(pObj->Reference().ObjectNumber() ) ) // rKidsArray's - { - refToLog = pParent->Reference(); - } - else - { - refToLog = pObj->Reference(); - } - } - else - { - refToLog = rIterArrayRef; - } - PdfError::LogMessage( eLogSeverity_Error, - "Entry in Kids array is itself an array" - "%s reference: %s\n", isDirectObject ? " (direct object)" - ", in object with" : ",", refToLog.ToString().c_str() ); + //printf("Fetching: %i %i\n", numKids, nPageNum ); - const PdfArray & rIterArray = (*it).GetArray(); + // We have to traverse the tree + // + // BEWARE: There is no valid shortcut for tree traversal. + // Even if eKidsArray.size()==numKids, this does not imply that + // eKidsArray can be accessed with the index of the page directly. + // The tree could have an arbitrary complex structure because + // internal nodes with no leaves (page objects) are not forbidden + // by the PDF spec. + while( it != rKidsArray.end() ) + { + if(!(*it).IsReference() ) + { + PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", + nPageNum, (*it).GetDataTypeString()); + return NULL; + } - // is the array large enough to potentially have the page? - if( static_cast(nPageNum) < rIterArray.GetSize() ) - { - PdfObject* pPageNode = GetPageNodeFromArray( nPageNum, - rIterArray, rLstParents ); - if ( pPageNode ) // and if not, search further - return pPageNode; - } - } - else if( (*it).IsReference() ) - { PdfObject* pChild = GetRoot()->GetOwner()->GetObject( (*it).GetReference() ); - if (!pChild) + if (!pChild) { - PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Child not found: %s\n", - nPageNum, (*it).GetReference().ToString().c_str()); + PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Child not found: %s\n", + nPageNum, (*it).GetReference().ToString().c_str()); return NULL; } - if( this->IsTypePages(pChild) ) + if( this->IsTypePages(pChild) ) { int childCount = GetChildCount( pChild ); if( childCount < nPageNum + 1 ) // Pages are 0 based, but count is not { - // skip this page node - // and go to the next one + // skip this page tree node + // and go to the next child in rKidsArray nPageNum -= childCount; } else { + // page is in the subtree of pChild + // => call GetPageNode() recursively + rLstParents.push_back( pParent ); + + if ( std::find( rLstParents.begin(), rLstParents.end(), pChild ) + != rLstParents.end() ) // cycle in parent list detected, fend + { // off security vulnerability similar to CVE-2017-8054 (infinite recursion) + std::ostringstream oss; + oss << "Cycle in page tree: child in /Kids array of object " + << ( *(rLstParents.rbegin()) )->Reference().ToString() + << " back-references to object " << pChild->Reference() + .ToString() << " one of whose descendants the former is."; + PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() ); + } + return this->GetPageNode( nPageNum, pChild, rLstParents ); } } - else if( this->IsTypePage(pChild) ) + else if( this->IsTypePage(pChild) ) { if( 0 == nPageNum ) { + // page found rLstParents.push_back( pParent ); return pChild; - } + } // Skip a normal page if(nPageNum > 0 ) @@ -448,100 +426,16 @@ PdfObject* PdfPagesTree::GetPageNode( in "Invalid datatype referenced in kids array: %s\n" "Reference to invalid object: %i %i R\n", nPageNum, pChild->GetDataTypeString(), nLogObjNum, nLogGenNum); + return NULL; } - } - else - { - PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", - nPageNum, (*it).GetDataTypeString()); - return NULL; - } - - ++it; - } - } - - return NULL; -} - -PdfObject* PdfPagesTree::GetPageNodeFromArray( int nPageNum, const PdfArray & rKidsArray, PdfObjectList & rLstParents ) -{ - if( static_cast(nPageNum) >= rKidsArray.GetSize() ) - { - PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i from array of size %i\n", - nPageNum, rKidsArray.size() ); - return NULL; - } - - // TODO: Fill cache immediately with all pages - // in this kids array - PdfVariant rVar = rKidsArray[nPageNum]; - while( true ) - { - if( rVar.IsArray() ) - { - // Fixes some broken PDFs who have trees with 1 element kids arrays - // Recursive call removed to prevent stack overflow (CVE-2017-8054) - // replaced by the following inside this conditional incl. continue - const PdfArray & rVarArray = rVar.GetArray(); - if (rVarArray.GetSize() == 0) - { - PdfError::LogMessage( eLogSeverity_Critical, "Trying to access" - " first page index of empty array" ); - return NULL; - } - PdfVariant rVarFirstEntry = rVarArray[0]; // avoids use-after-free - rVar = rVarFirstEntry; // in this line (rVar-ref'd array is freed) - continue; - } - else if( !rVar.IsReference() ) - { - PODOFO_RAISE_ERROR_INFO( ePdfError_NotImplemented, "Cannot handle inline pages." ); - } - - PdfObject* pgObject = GetRoot()->GetOwner()->GetObject( rVar.GetReference() ); - if(pgObject==NULL) - { - PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "Invalid reference." ); - } - - //printf("Reading %s\n", pgObject->Reference().ToString().c_str()); - // make sure the object is a /Page and not a /Pages with a single kid - if( this->IsTypePage(pgObject) ) - { - return pgObject; - } - // it's a /Pages with a single kid, so dereference and try again... - if (this->IsTypePages(pgObject) ) - { - if( !pgObject->GetDictionary().HasKey( "Kids" ) ) - return NULL; - - if ( std::find( rLstParents.begin(), rLstParents.end(), pgObject ) - != rLstParents.end() ) // cycle in parent list detected, fend - { // off security vulnerability CVE-2017-8054 (infinite recursion) - std::ostringstream oss; - oss << "Cycle in page tree: child in /Kids array of object " - << ( *(rLstParents.rbegin()) )->Reference().ToString() - << " back-references to object " << pgObject->Reference() - .ToString() << " one of whose descendants the former is."; - - PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() ); - } - - rLstParents.push_back( pgObject ); - rVar = *(pgObject->GetDictionary().GetKey( "Kids" )); - } else { - // Reference to unexpected object - PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "Reference to unexpected object." ); + ++it; } - } return NULL; } -bool PdfPagesTree::IsTypePage(const PdfObject* pObject) const +bool PdfPagesTree::IsTypePage(const PdfObject* pObject) const { if( !pObject ) return false; @@ -552,7 +446,7 @@ bool PdfPagesTree::IsTypePage(const PdfO return false; } -bool PdfPagesTree::IsTypePages(const PdfObject* pObject) const +bool PdfPagesTree::IsTypePages(const PdfObject* pObject) const { if( !pObject ) return false; @@ -565,12 +459,12 @@ bool PdfPagesTree::IsTypePages(const Pdf int PdfPagesTree::GetChildCount( const PdfObject* pNode ) const { - if( !pNode ) + if( !pNode ) return 0; const PdfObject *pCount = pNode->GetIndirectKey( "Count" ); if( pCount != 0 ) { - return (pCount->GetDataType() == PoDoFo::ePdfDataType_Number) ? + return (pCount->GetDataType() == PoDoFo::ePdfDataType_Number) ? static_cast( pCount->GetNumber() ):0; } else { return 0; @@ -589,7 +483,7 @@ int PdfPagesTree::GetPosInKids( PdfObjec PdfArray::const_iterator it = rKids.begin(); int index = 0; - while( it != rKids.end() ) + while( it != rKids.end() ) { if( (*it).GetReference() == pPageObj->Reference() ) { @@ -606,10 +500,10 @@ int PdfPagesTree::GetPosInKids( PdfObjec return -1; } -void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, +void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, int nIndex, PdfObject* pPage ) { - if( !pParent || !pPage ) + if( !pParent || !pPage ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } @@ -625,17 +519,17 @@ void PdfPagesTree::InsertPageIntoNode( P newKids.reserve( oldKids.GetSize() + 1 ); - if( nIndex < 0 ) + if( nIndex < 0 ) { newKids.push_back( pPage->Reference() ); } int i = 0; - while( it != oldKids.end() ) + while( it != oldKids.end() ) { newKids.push_back( *it ); - if( i == nIndex ) + if( i == nIndex ) newKids.push_back( pPage->Reference() ); ++i; @@ -650,7 +544,7 @@ void PdfPagesTree::InsertPageIntoNode( P */ pParent->GetDictionary().AddKey( PdfName("Kids"), newKids ); - + // 2. increase count PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); while( itParents != rlstParents.rend() ) @@ -658,16 +552,16 @@ void PdfPagesTree::InsertPageIntoNode( P this->ChangePagesCount( *itParents, 1 ); ++itParents; - } + } // 3. add parent key to the page pPage->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() ); } -void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, +void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, int nIndex, const std::vector& vecPages ) { - if( !pParent || !vecPages.size() ) + if( !pParent || !vecPages.size() ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } @@ -683,7 +577,7 @@ void PdfPagesTree::InsertPagesIntoNode( bool bIsPushedIn = false; int i=0; - for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i ) + for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i ) { if ( !bIsPushedIn && (nIndex < i) ) // Pushing before { @@ -697,7 +591,7 @@ void PdfPagesTree::InsertPagesIntoNode( } // If new kids are still not pushed in then they may be appending to the end - if ( !bIsPushedIn && ( (nIndex + 1) == static_cast(oldKids.size())) ) + if ( !bIsPushedIn && ( (nIndex + 1) == static_cast(oldKids.size())) ) { for (std::vector::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages) { @@ -707,13 +601,13 @@ void PdfPagesTree::InsertPagesIntoNode( } pParent->GetDictionary().AddKey( PdfName("Kids"), newKids ); - + // 2. increase count for ( PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); itParents != rlstParents.rend(); ++itParents ) { this->ChangePagesCount( *itParents, vecPages.size() ); - } + } // 3. add parent key to each of the pages for (std::vector::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages) @@ -722,10 +616,10 @@ void PdfPagesTree::InsertPagesIntoNode( } } -void PdfPagesTree::DeletePageFromNode( PdfObject* pParent, const PdfObjectList & rlstParents, +void PdfPagesTree::DeletePageFromNode( PdfObject* pParent, const PdfObjectList & rlstParents, int nIndex, PdfObject* pPage ) { - if( !pParent || !pPage ) + if( !pParent || !pPage ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } @@ -746,14 +640,14 @@ void PdfPagesTree::DeletePageFromNode( P this->ChangePagesCount( *itParents, -1 ); ++itParents; - } + } // 3. Remove empty pages nodes itParents = rlstParents.rbegin(); while( itParents != rlstParents.rend() ) { // Never delete root node - if( IsEmptyPageNode( *itParents ) && *itParents != GetRoot() ) + if( IsEmptyPageNode( *itParents ) && *itParents != GetRoot() ) { PdfObject* pParentOfNode = *(itParents + 1); int nKidsIndex = this->GetPosInKids( *itParents, pParentOfNode ); @@ -764,10 +658,10 @@ void PdfPagesTree::DeletePageFromNode( P } ++itParents; - } + } } -void PdfPagesTree::DeletePageNode( PdfObject* pParent, int nIndex ) +void PdfPagesTree::DeletePageNode( PdfObject* pParent, int nIndex ) { PdfArray kids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray(); kids.erase( kids.begin() + nIndex ); @@ -779,7 +673,7 @@ int PdfPagesTree::ChangePagesCount( PdfO // Increment or decrement inPagesDict's Count by inDelta, and return the new count. // Simply return the current count if inDelta is 0. int cnt = GetChildCount( pPageObj ); - if( 0 != nDelta ) + if( 0 != nDelta ) { cnt += nDelta ; pPageObj->GetDictionary().AddKey( "Count", PdfVariant( static_cast(cnt) ) ); @@ -788,7 +682,7 @@ int PdfPagesTree::ChangePagesCount( PdfO return cnt ; } -bool PdfPagesTree::IsEmptyPageNode( PdfObject* pPageNode ) +bool PdfPagesTree::IsEmptyPageNode( PdfObject* pPageNode ) { long lCount = GetChildCount( pPageNode ); bool bKidsEmpty = true; @@ -802,7 +696,7 @@ bool PdfPagesTree::IsEmptyPageNode( PdfO } /* -PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pPagesObject, +PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pPagesObject, std::deque & rListOfParents ) { // recurse through the pages tree nodes @@ -835,9 +729,9 @@ PdfObject* PdfPagesTree::GetPageNode( in } PdfVariant pgVar = kidsArray[ nPageNum ]; - while ( true ) + while ( true ) { - if ( pgVar.IsArray() ) + if ( pgVar.IsArray() ) { // Fixes some broken PDFs who have trees with 1 element kids arrays return GetPageNodeFromTree( nPageNum, pgVar.GetArray(), rListOfParents ); @@ -857,8 +751,8 @@ PdfObject* PdfPagesTree::GetPageNode( in rListOfParents.push_back( pgObject ); pgVar = *(pgObject->GetDictionary().GetKey( "Kids" )); } - } - else + } + else { return GetPageNodeFromTree( nPageNum, kidsArray, rListOfParents ); } diff -rupN podofo-0.9.6/src/doc/PdfPagesTree.h podofo-0.9.6-new/src/doc/PdfPagesTree.h --- podofo-0.9.6/src/doc/PdfPagesTree.h 2014-06-15 14:27:46.000000000 +0200 +++ podofo-0.9.6-new/src/doc/PdfPagesTree.h 2018-12-19 22:42:36.898106630 +0100 @@ -190,7 +190,6 @@ class PODOFO_DOC_API PdfPagesTree : publ PdfPagesTree(); // don't allow construction from nothing! PdfObject* GetPageNode( int nPageNum, PdfObject* pParent, PdfObjectList & rLstParents ); - PdfObject* GetPageNodeFromArray( int nPageNum, const PdfArray & rKidsArray, PdfObjectList & rLstParents ); int GetChildCount( const PdfObject* pNode ) const; diff -rupN podofo-0.9.6/test/unit/PagesTreeTest.cpp podofo-0.9.6-new/test/unit/PagesTreeTest.cpp --- podofo-0.9.6/test/unit/PagesTreeTest.cpp 2016-05-12 22:08:20.000000000 +0200 +++ podofo-0.9.6-new/test/unit/PagesTreeTest.cpp 2018-12-19 22:42:36.899106551 +0100 @@ -22,6 +22,8 @@ #include +#include + #define PODOFO_TEST_PAGE_KEY "PoDoFoTestPageNumber" #define PODOFO_TEST_NUM_PAGES 100 @@ -70,6 +72,58 @@ void PagesTreeTest::testEmptyDoc() CPPUNIT_ASSERT_THROW( writer.GetPage( 1 ), PdfError ); } +void PagesTreeTest::testCyclicTree() +{ + for (int pass=0; pass < 2; pass++) + { + PdfMemDocument doc; + CreateCyclicTree( doc, pass==1); + //doc.Write(pass==0?"tree_valid.pdf":"tree_cyclic.pdf"); + for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++) + { + if (pass==0) + { + // pass 0: + // valid tree without cycles should yield all pages + PdfPage* pPage = doc.GetPage( pagenum ); + CPPUNIT_ASSERT_EQUAL( pPage != NULL, true ); + CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true ); + } + else + { + // pass 1: + // cyclic tree must throw exception to prevent infinite recursion + CPPUNIT_ASSERT_THROW( doc.GetPage( pagenum ), PdfError ); + } + } + } +} + +void PagesTreeTest::testEmptyKidsTree() +{ + PdfMemDocument doc; + CreateEmptyKidsTree(doc); + //doc.Write("tree_zerokids.pdf"); + for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++) + { + PdfPage* pPage = doc.GetPage( pagenum ); + CPPUNIT_ASSERT_EQUAL( pPage != NULL, true ); + CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true ); + } +} + +void PagesTreeTest::testNestedArrayTree() +{ + PdfMemDocument doc; + CreateNestedArrayTree(doc); + //doc.Write("tree_nested_array.pdf"); + for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++) + { + PdfPage* pPage = doc.GetPage( pagenum ); + CPPUNIT_ASSERT_EQUAL( pPage == NULL, true ); + } +} + void PagesTreeTest::testCreateDelete() { PdfMemDocument writer; @@ -354,6 +408,152 @@ void PagesTreeTest::CreateTestTreeCustom pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast(PODOFO_TEST_NUM_PAGES) ); } +std::vector PagesTreeTest::CreateSamplePages( PdfMemDocument & rDoc, + int nPageCount) +{ + PdfFont* pFont; + + // create font + pFont = rDoc.CreateFont( "Arial" ); + if( !pFont ) + { + PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); + } + pFont->SetFontSize( 16.0 ); + + std::vector pPage(nPageCount); + for (int i = 0; i < nPageCount; ++i) + { + pPage[i] = new PdfPage( PdfPage::CreateStandardPageSize( ePdfPageSize_A4 ), + &(rDoc.GetObjects()) ); + pPage[i]->GetObject()->GetDictionary().AddKey( PODOFO_TEST_PAGE_KEY, + static_cast(i) ); + + PdfPainter painter; + painter.SetPage( pPage[i] ); + painter.SetFont( pFont ); + std::ostringstream os; + os << "Page " << i+1; + painter.DrawText( 200, 200, os.str() ); + painter.FinishPage(); + } + + return pPage; +} + +std::vector PagesTreeTest::CreateNodes( PdfMemDocument & rDoc, + int nNodeCount) +{ + std::vector pNode(nNodeCount); + + for (int i = 0; i < nNodeCount; ++i) + { + pNode[i]=rDoc.GetObjects().CreateObject("Pages"); + // init required keys + pNode[i]->GetDictionary().AddKey( "Kids", PdfArray()); + pNode[i]->GetDictionary().AddKey( "Count", PdfVariant(static_cast(0L))); + } + + return pNode; +} + +void PagesTreeTest::CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc, + bool bCreateCycle ) +{ + const int COUNT = 3; + + std::vector pPage=CreateSamplePages( rDoc, COUNT ); + std::vector pNode=CreateNodes( rDoc, 2 ); + + // manually insert pages into pagetree + PdfObject* pRoot = rDoc.GetPagesTree()->GetObject(); + + // tree layout (for !bCreateCycle): + // + // root + // +-- node0 + // +-- node1 + // | +-- page0 + // | +-- page1 + // \-- page2 + + // root node + AppendChildNode(pRoot, pNode[0]); + + // tree node 0 + AppendChildNode(pNode[0], pNode[1]); + AppendChildNode(pNode[0], pPage[2]->GetObject()); + + // tree node 1 + AppendChildNode(pNode[1], pPage[0]->GetObject()); + AppendChildNode(pNode[1], pPage[1]->GetObject()); + + if (bCreateCycle) + { + // invalid tree: Cycle!!! + // was not detected in PdfPagesTree::GetPageNode() rev. 1937 + pNode[0]->GetIndirectKey("Kids")->GetArray()[0]=pRoot->Reference(); + } +} + +void PagesTreeTest::CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc ) +{ + const int COUNT = 3; + + std::vector pPage=CreateSamplePages( rDoc, COUNT ); + std::vector pNode=CreateNodes( rDoc, 3 ); + + // manually insert pages into pagetree + PdfObject* pRoot = rDoc.GetPagesTree()->GetObject(); + + // tree layout: + // + // root + // +-- node0 + // | +-- page0 + // | +-- page1 + // | +-- page2 + // +-- node1 + // \-- node2 + + // root node + AppendChildNode(pRoot, pNode[0]); + AppendChildNode(pRoot, pNode[1]); + AppendChildNode(pRoot, pNode[2]); + + // tree node 0 + AppendChildNode(pNode[0], pPage[0]->GetObject()); + AppendChildNode(pNode[0], pPage[1]->GetObject()); + AppendChildNode(pNode[0], pPage[2]->GetObject()); + + // tree node 1 and node 2 are left empty: this is completely valid + // according to the PDF spec, i.e. the required keys may have the + // values "/Kids [ ]" and "/Count 0" +} + +void PagesTreeTest::CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc ) +{ + const int COUNT = 3; + + std::vector pPage=CreateSamplePages( rDoc, COUNT ); + PdfObject* pRoot = rDoc.GetPagesTree()->GetObject(); + + // create kids array + PdfArray kids; + for (int i=0; i < COUNT; i++) + { + kids.push_back( pPage[i]->GetObject()->Reference() ); + pPage[i]->GetObject()->GetDictionary().AddKey( PdfName("Parent"), pRoot->Reference()); + } + + // create nested kids array + PdfArray nested; + nested.push_back(kids); + + // manually insert pages into pagetree + pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast(COUNT) ); + pRoot->GetDictionary().AddKey( PdfName("Kids"), nested); +} bool PagesTreeTest::IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber ) { @@ -367,3 +567,33 @@ bool PagesTreeTest::IsPageNumber( PoDoFo else return true; } + +void PagesTreeTest::AppendChildNode(PdfObject* pParent, PdfObject* pChild) +{ + // 1. Add the reference of the new child to the kids array of pParent + PdfArray kids; + PdfObject* oldKids=pParent->GetIndirectKey("Kids"); + if (oldKids && oldKids->IsArray()) kids=oldKids->GetArray(); + kids.push_back(pChild->Reference()); + pParent->GetDictionary().AddKey( PdfName("Kids"), kids); + + // 2. If the child is a page (leaf node), increase count of every parent + // (which also includes pParent) + if( pChild->GetDictionary().GetKeyAsName( PdfName( "Type" ) ) + == PdfName( "Page" ) ) + { + PdfObject* node=pParent; + while (node) + { + pdf_int64 count=0; + if (node->GetIndirectKey("Count")) count=node->GetIndirectKey("Count")->GetNumber(); + count++; + node->GetDictionary().AddKey( PdfName("Count"), count); + + node=node->GetIndirectKey("Parent"); + } + } + + // 3. Add Parent key to the child + pChild->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference()); +} diff -rupN podofo-0.9.6/test/unit/PagesTreeTest.h podofo-0.9.6-new/test/unit/PagesTreeTest.h --- podofo-0.9.6/test/unit/PagesTreeTest.h 2009-05-08 19:45:52.000000000 +0200 +++ podofo-0.9.6-new/test/unit/PagesTreeTest.h 2018-12-19 22:42:36.899106551 +0100 @@ -21,11 +21,14 @@ #ifndef _PAGES_TREE_TEST_H_ #define _PAGES_TREE_TEST_H_ +#include + #include namespace PoDoFo { class PdfMemDocument; class PdfPage; +class PdfObject; }; /** This test tests the class PdfPagesTree @@ -35,6 +38,9 @@ class PagesTreeTest : public CppUnit::Te CPPUNIT_TEST_SUITE( PagesTreeTest ); CPPUNIT_TEST( testEmptyTree ); CPPUNIT_TEST( testEmptyDoc ); + CPPUNIT_TEST( testCyclicTree ); + CPPUNIT_TEST( testEmptyKidsTree ); + CPPUNIT_TEST( testNestedArrayTree ); CPPUNIT_TEST( testCreateDelete ); CPPUNIT_TEST( testGetPagesCustom ); CPPUNIT_TEST( testGetPagesPoDoFo ); @@ -52,6 +58,9 @@ class PagesTreeTest : public CppUnit::Te void testEmptyTree(); void testEmptyDoc(); + void testCyclicTree(); + void testEmptyKidsTree(); + void testNestedArrayTree(); void testCreateDelete(); void testGetPagesCustom(); void testGetPagesPoDoFo(); @@ -98,7 +107,58 @@ class PagesTreeTest : public CppUnit::Te */ void CreateTestTreeCustom( PoDoFo::PdfMemDocument & rDoc ); + /** + * Create a pages tree with cycles to test prevention of endless + * recursion as mentioned in different CVE reports. + * + * \param bCreateCycle if true a cyclic tree is created, otherwise a + * valid tree without cycles + */ + void CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc, + bool bCreateCycle ); + + /** + * Create a pages tree with nodes containing empty kids. + * + * This is completely valid according to the PDF spec, i.e. the + * required keys may have the values "/Kids [ ]" and "/Count 0" + * Such a tree must still be parsable by a conforming reader: + * + *
The tree contains nodes of two types���intermediate + * nodes, called page tree nodes, and leaf nodes, called page + * objects���whose form is described in the subsequent subclauses. + * Conforming products shall be prepared to handle any form + * of tree structure built of such nodes.
+ */ + void CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc ); + + /** + * Ceate a pages tree with a nested kids array. + * + * Such a tree is not valid to the PDF spec, which requires they key + * "Kids" to be an array of indirect references. And the children shall + * only be page objects or other page tree nodes. + */ + void CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc ); + + /** + * Create page object nodes (leaf nodes), + * where every page object has an additional + * key PoDoFoTestPageNumber with the original + * page number of the page. + */ + std::vector CreateSamplePages( PoDoFo::PdfMemDocument & rDoc, + int nPageCount); + + /** + * Create page tree nodes (internal nodes) + */ + std::vector CreateNodes( PoDoFo::PdfMemDocument & rDoc, + int nNodeCount); + bool IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber ); + + void AppendChildNode(PoDoFo::PdfObject* pParent, PoDoFo::PdfObject* pChild); }; #endif // _PAGES_TREE_TEST_H_