From 4569e4eea99e91181836c674c4614664fc18cf01 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Thu, 16 Sep 2004 21:04:49 +0000 Subject: [PATCH] Backported search. Clean, elegant and SOO FAST after the 1st loop (well, if you have enough ram). Based on aacid's search code. This implements case sensitive search too (a little tricky but it worths). Finally done. svn path=/branches/kpdf_experiments/kdegraphics/kpdf/; revision=347051 --- kpdf/QOutputDev.cpp | 52 +++++++++++-- kpdf/QOutputDev.h | 43 ++++++++++- kpdf/document.cpp | 175 +++++++++++++++++++++----------------------- kpdf/document.h | 4 +- kpdf/kpdf_part.cpp | 11 ++- kpdf/page.cpp | 112 ++++++++++++++++------------ kpdf/page.h | 20 ++--- 7 files changed, 259 insertions(+), 158 deletions(-) diff --git a/kpdf/QOutputDev.cpp b/kpdf/QOutputDev.cpp index df0662dd3..6e3a5f91f 100644 --- a/kpdf/QOutputDev.cpp +++ b/kpdf/QOutputDev.cpp @@ -27,10 +27,7 @@ // NOTE: XPDF/Splash implementation dependant code will be marked with '###' -//------------------------------------------------------------------------ -// KPDFOutputDev -//------------------------------------------------------------------------ - +// BEGIN KPDFOutputDev KPDFOutputDev::KPDFOutputDev(SplashColor paperColor) : SplashOutputDev(splashModeRGB8, false, paperColor), m_pixmapWidth( -1 ), m_pixmapHeight( -1 ), m_pixmap( 0 ), m_text( 0 ) @@ -71,8 +68,6 @@ TextPage * KPDFOutputDev::takeTextPage() return text; } - - void KPDFOutputDev::startPage(int pageNum, GfxState *state) { m_pageNum = pageNum; @@ -96,7 +91,6 @@ void KPDFOutputDev::endPage() if ( bw != m_pixmapWidth || bh != m_pixmapHeight ) { // it may happen (in fact it doesn't) that we need rescaling - kdWarning() << "Pixmap at page '" << m_pageNum << "' needed rescale." << endl; m_pixmap = new QPixmap( img->smoothScale( m_pixmapWidth, m_pixmapHeight ) ); } @@ -137,3 +131,47 @@ GBool KPDFOutputDev::beginType3Char(GfxState *state, double x, double y, double m_text->addChar(state, x, y, dx, dy, code, u, uLen); return SplashOutputDev::beginType3Char(state, x, y, dx, dy, code, u, uLen); } +// END KPDFOutputDev + + +// BEGIN KPDFTextDev +KPDFTextDev::KPDFTextDev() +{ + m_text = new TextPage( gFalse ); +} + +KPDFTextDev::~KPDFTextDev() +{ + delete m_text; +} + +TextPage * KPDFTextDev::takeTextPage() +{ + TextPage * t = m_text; + m_text = 0; + return t; +} + +void KPDFTextDev::startPage(int, GfxState *state) +{ + if ( !m_text ) + m_text = new TextPage( gFalse ); + m_text->startPage(state); +} + +void KPDFTextDev::endPage() +{ + m_text->endPage(); + m_text->coalesce(gTrue); +} + +void KPDFTextDev::updateFont(GfxState *state) +{ + m_text->updateFont(state); +} + +void KPDFTextDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, Unicode *u, int uLen) +{ + m_text->addChar(state, x, y, dx, dy, code, u, uLen); +} +// END KPDFTextDev diff --git a/kpdf/QOutputDev.h b/kpdf/QOutputDev.h index ac2cbf057..77b126ace 100644 --- a/kpdf/QOutputDev.h +++ b/kpdf/QOutputDev.h @@ -28,7 +28,7 @@ class TextPage; class KPDFPage; /** - * @short A SplashOutputDev rendered that grab text and links. + * @short A SplashOutputDev renderer that grabs text and links. * * This output device: * - renders the page using SplashOutputDev (its parent) @@ -72,4 +72,45 @@ private: TextPage * m_text; }; + +/** + * @short Collect text into a takeable TextPage. + * + * This is the simplest OutputDev. It harvests text from currently + * rendered page and provides a method for getting the TextPage. + * Xpdf's textOutputDev can't return a textpage, unfortunately. + */ +class KPDFTextDev : public OutputDev +{ +public: + KPDFTextDev(); + virtual ~KPDFTextDev(); + + // takes pointers out of the class (so deletion it's up to others) + TextPage * takeTextPage(); + + /** inherited from OutputDev */ + // top left corner is (0,0) + virtual GBool upsideDown() { return gTrue; } + // use draw char to get text data + virtual GBool useDrawChar() { return gTrue; } + // use drawChar even for Type3 chars + virtual GBool interpretType3Chars() { return gFalse; } + // do not pass non-text to this device + virtual GBool needNonText() { return gFalse; } + + // Start a page. + virtual void startPage(int, GfxState *state); + // End a page. + virtual void endPage(); + //----- update text state + virtual void updateFont(GfxState *state); + //----- text drawing + virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, Unicode *u, int uLen); + +private: + // text page generated by execution + TextPage * m_text; +}; + #endif diff --git a/kpdf/document.cpp b/kpdf/document.cpp index 243bb8aa4..da0081d73 100644 --- a/kpdf/document.cpp +++ b/kpdf/document.cpp @@ -13,11 +13,13 @@ #include #include #include +#include +#include +#include // local includes #include "PDFDoc.h" #include "QOutputDev.h" -//#include "TextOutputDev.h" #include "kpdf_error.h" #include "document.h" @@ -41,6 +43,14 @@ public: float currentPosition; QValueVector< KPDFPage* > pages; + // find related + QString lastSearchText; + long lastSearchOptions; + KPDFPage * lastSearchPage; + + // filtering related + QString filterString; + // observers related (note: won't delete oservers) QMap< int, KPDFDocumentObserver* > observers; }; @@ -59,6 +69,7 @@ KPDFDocument::KPDFDocument() d->pdfdoc = 0; d->currentPage = -1; d->currentPosition = 0; + d->lastSearchPage = 0; SplashColor paperColor; paperColor.rgb8 = splashMakeRGB8( 0xff, 0xff, 0xff ); d->kpdfOutputDev = new KPDFOutputDev( paperColor ); @@ -163,17 +174,22 @@ void KPDFDocument::requestPixmap( int id, uint page, int width, int height, bool // in-place Pixmap generation for syncronous requests if ( !kp->hasPixmap( id, width, height ) ) { - // set KPDFPage pointer to outputdevice for links/text harvesting - d->kpdfOutputDev->setParams( width, height, false ); - // compute dpi used to get an image with desired width and height double fakeDpiX = width * 72.0 / kp->width(), fakeDpiY = height * 72.0 / kp->height(); + + // setup kpdf output device: text page is generated only if we are at 72dpi. + // since we can pre-generate the TextPage at the right res.. why not? + bool genTextPage = !kp->hasSearchPage() && (width == kp->width()) && (height == kp->height()); + d->kpdfOutputDev->setParams( width, height, genTextPage ); + d->docLock.lock(); - d->pdfdoc->displayPage( d->kpdfOutputDev, page + 1, fakeDpiX, fakeDpiY, 0, true, true ); + d->pdfdoc->displayPage( d->kpdfOutputDev, page + 1, fakeDpiX, fakeDpiY, 0, true, false/*dolinks*/ ); d->docLock.unlock(); kp->setPixmap( id, d->kpdfOutputDev->takePixmap() ); + if ( genTextPage ) + kp->setSearchPage( d->kpdfOutputDev->takeTextPage() ); d->observers[id]->notifyPixmapChanged( page ); } @@ -200,101 +216,80 @@ void KPDFDocument::slotSetCurrentPagePosition( int page, float position ) pageChanged(); } -void KPDFDocument::slotFind( bool /*nextMatch*/, const QString & /*text*/ ) +void KPDFDocument::slotSetFilter( const QString & pattern ) { -/* - TextOutputDev *textOut; - Unicode *u; - bool found; - double xMin1, yMin1, xMax1, yMax1; - int len, pg; + d->filterString = pattern; + if ( pattern.length() > 3 ) + sendFilteredPageList(); +} - // This is more or less copied from what xpdf does, surely can be optimized - len = s.length(); - u = (Unicode *)gmalloc(len * sizeof(Unicode)); - for (int i = 0; i < len; ++i) u[i] = (Unicode)(s.latin1()[i] & 0xff); - - // search current - found = m_outputDev->find(u, len, next); - - if (!found) - { - // search following pages - textOut = new TextOutputDev(NULL, gTrue, gFalse, gFalse); - if (!textOut->isOk()) +void KPDFDocument::slotFind( const QString & t, long opt ) +{ + // reload last options if in 'find next' case + long options = t.isEmpty() ? d->lastSearchOptions : opt; + QString text = t.isEmpty() ? d->lastSearchText : t; + if ( !t.isEmpty() ) { - gfree(u); - delete textOut; - return; + d->lastSearchText = t; + d->lastSearchOptions = opt; } - pg = m_currentPage + 1; - while(!found && pg <= d->pdfdoc->getNumPages()) - { - m_docMutex.lock(); - d->pdfdoc->displayPage(textOut, pg, 72, 72, 0, gTrue, gFalse); - m_docMutex.unlock(); - found = textOut->findText(u, len, gTrue, gTrue, gFalse, gFalse, &xMin1, &yMin1, &xMax1, &yMax1); - if (!found) pg++; - } + // check enabled options (only caseSensitive support until now) + bool caseSensitive = options & KFindDialog::CaseSensitive; - if (!found && m_currentPage != 1) - { - if (KMessageBox::questionYesNo(widget(), i18n("End of document reached.\nContinue from the beginning?")) == KMessageBox::Yes) - { - // search previous pages - pg = 1; - while(!found && pg < m_currentPage) + // continue checking last SearchPage first (if it is the current page) + KPDFPage * foundPage = 0; + int currentPage = d->currentPage; + int pageCount = d->pages.count(); + if ( d->lastSearchPage && (int)d->lastSearchPage->number() == currentPage ) + if ( d->lastSearchPage->hasText( text, caseSensitive, false ) ) + foundPage = d->lastSearchPage; + else { - m_docMutex.lock(); - d->pdfdoc->displayPage(textOut, pg, 72, 72, 0, gTrue, gFalse); - m_docMutex.unlock(); - found = textOut->findText(u, len, gTrue, gTrue, gFalse, gFalse, &xMin1, &yMin1, &xMax1, &yMax1); - if (!found) pg++; - } - } - } - - delete textOut; - if (found) - { - kdDebug() << "found at " << pg << endl; - goToPage(pg); - // xpdf says: can happen that we do not find the text if coalescing is bad OUCH - //FIXME Enrico: expanded "m_outputDev(the widget)->find(u, len, false);" above: - bool PageWidget::find( Unicode * u, int len, bool next ) - {return false; TODO !!restore!! Enrico - bool b; - if (!next) - { - // ensure we are searching the whole page - m_xMin = 0; - m_yMin = 0; + d->lastSearchPage->hilightLastSearch( false ); + currentPage++; + pageCount--; } - b = m_outputdev(a QOut..) -> find(u, len, !next, true, next, false, &m_xMin, &m_yMin, &m_xMax, &m_yMax); - m_xMin = m_xMin / m_zoomFactor; - m_yMin = m_yMin / m_zoomFactor; - m_xMax = m_xMax / m_zoomFactor; - m_yMax = m_yMax / m_zoomFactor; - m_selection = b; - updateContents(); - return b; - } - // expanded ends here + if ( !foundPage ) + // loop through the whole document + for ( int i = 0; i < pageCount; i++ ) + { + if ( currentPage >= pageCount ) + { + if ( KMessageBox::questionYesNo(0, i18n("End of document reached.\nContinue from the beginning?")) == KMessageBox::Yes ) + currentPage = 0; + else + break; + } + KPDFPage * page = d->pages[ currentPage ]; + if ( !page->hasSearchPage() ) + { + // build a TextPage using the lightweight KPDFTextDev generator.. + KPDFTextDev td; + d->docLock.lock(); + d->pdfdoc->displayPage( &td, page->number()+1, 72, 72, 0, true, false ); + d->docLock.unlock(); + // ..and attach it to the page + page->setSearchPage( td.takeTextPage() ); + } + if ( page->hasText( text, caseSensitive, true ) ) + { + foundPage = page; + break; + } + currentPage++; + } + + if ( foundPage ) + { + d->lastSearchPage = foundPage; + foundPage->hilightLastSearch( true ); + slotSetCurrentPage( foundPage->number() ); + foreachObserver( notifyPixmapChanged( foundPage->number() ) ); } else - { - if (next) KMessageBox::information(widget(), i18n("No more matches found for '%1'.").arg(s)); - else KMessageBox::information(widget(), i18n("No matches found for '%1'.").arg(s)); - } - } - - if (found) m_findText = s; - else m_findText = QString::null; - - gfree(u); -*/ + KMessageBox::information( 0, i18n("No matches found for '%1'.").arg(text) ); } void KPDFDocument::slotGoToLink( /* QString anchor */ ) @@ -328,6 +323,7 @@ void KPDFDocument::deletePages() delete d->pages[i]; d->pages.clear(); d->currentPage = -1; + d->lastSearchPage = 0; } /** TO BE IMPORTED: @@ -406,5 +402,4 @@ void ThumbnailList::customEvent(QCustomEvent *e) } */ - #include "document.moc" diff --git a/kpdf/document.h b/kpdf/document.h index 71d258f51..98d19510e 100644 --- a/kpdf/document.h +++ b/kpdf/document.h @@ -33,7 +33,6 @@ public: // commands from the Document to all observers virtual void pageSetup( const QValueList & /*pages*/ ) {}; virtual void pageSetCurrent( int /*pageNumber*/, float /*position*/ ) {}; - //virtual void pageSetHilight( int /*x*/, int /*y*/, int /*width*/, int /*height*/ ) {}; }; #define PAGEWIDGET_ID 1 @@ -72,7 +71,8 @@ public slots: // document commands via slots void slotSetCurrentPage( int page ); void slotSetCurrentPagePosition( int page, float position ); - void slotFind( bool nextMatch, const QString & text = "" ); + void slotSetFilter( const QString & pattern ); + void slotFind( const QString & text = "", long options = 0 ); void slotGoToLink( /* QString anchor */ ); signals: diff --git a/kpdf/kpdf_part.cpp b/kpdf/kpdf_part.cpp index a0eed9482..8b3f906db 100644 --- a/kpdf/kpdf_part.cpp +++ b/kpdf/kpdf_part.cpp @@ -103,6 +103,7 @@ Part::Part(QWidget *parentWidget, const char *widgetName, // Page Traversal actions m_gotoPage = KStdAction::gotoPage( this, SLOT( slotGoToPage() ), ac, "goto_page" ); + m_gotoPage->setShortcut( "CTRL+G" ); m_prevPage = KStdAction::prior(this, SLOT(slotPreviousPage()), ac, "previous_page"); m_prevPage->setWhatsThis( i18n( "Moves to the previous page of the document" ) ); @@ -166,7 +167,6 @@ bool Part::openFile() { bool ok = document->openFile( m_file ); m_find->setEnabled( ok ); - m_findNext->setEnabled( ok ); return ok; } @@ -257,14 +257,17 @@ void Part::slotGotoLast() void Part::slotFind() { - KFindDialog dlg(widget()); + KFindDialog dlg( widget() ); if (dlg.exec() == QDialog::Accepted) - document->slotFind( false, dlg.pattern() ); + { + m_findNext->setEnabled( true ); + document->slotFind( dlg.pattern(), dlg.options() ); + } } void Part::slotFindNext() { - document->slotFind( true ); + document->slotFind(); } void Part::slotSaveFileAs() diff --git a/kpdf/page.cpp b/kpdf/page.cpp index 7d47d9d4a..4f773ea1b 100644 --- a/kpdf/page.cpp +++ b/kpdf/page.cpp @@ -23,7 +23,9 @@ // TODO think about moving rendering ... KPDFPage::KPDFPage( int page, float w, float h, int r ) - : m_number( page ), m_rotation( r ), m_width( w ), m_height( h ), m_text( 0 ) + : m_number( page ), m_rotation( r ), m_width( w ), m_height( h ), + m_sEnabled( false ), m_sLeft( 0 ), m_sTop( 0 ), m_sRight( 0 ), + m_sBottom( 0 ), m_text( 0 ) { } @@ -36,6 +38,27 @@ KPDFPage::~KPDFPage() } +bool KPDFPage::hasPixmap( int id, int width, int height ) const +{ + if ( !m_pixmaps.contains( id ) ) + return false; + QPixmap * p = m_pixmaps[ id ]; + return p ? ( p->width() == width && p->height() == height ) : false; +} + +bool KPDFPage::hasSearchPage() const +{ + return (m_text != 0); +} + +bool KPDFPage::hasLink( int mouseX, int mouseY ) const +{ + //TODO this. + //Sample implementation using a small rect as 'active' link zone + return QRect( 20,20, 100,50 ).contains( mouseX, mouseY ); +} + +// BEGIN commands (paint / search) void KPDFPage::drawPixmap( int id, QPainter * p, const QRect & limits, int width, int height ) const { QPixmap * pixmap = 0; @@ -79,34 +102,53 @@ void KPDFPage::drawPixmap( int id, QPainter * p, const QRect & limits, int width p->drawLine( 0, 0, width, height ); p->drawLine( 0, height, width, 0 ); } - // draw selection (FIXME Enrico: move selection stuff inside PAGE!!) - /*if ( there is something to hilght ) - p->setBrush(Qt::SolidPattern); - p->setPen(QPen(Qt::black, 1)); // should not be necessary bug a Qt bug makes it necessary - p->setRasterOp(Qt::NotROP); - p->drawRect(qRound(m_xMin*m_zoomFactor), qRound(m_yMin*m_zoomFactor), qRound((m_xMax- m_xMin)*m_zoomFactor), qRound((m_yMax- m_yMin)*m_zoomFactor)); - */ + // draw selection + if ( m_sEnabled ) + { + int x = (int)( m_sLeft * width / m_width ), + y = (int)( m_sTop * height / m_height ), + w = (int)( m_sRight * width / m_width ) - x, + h = (int)( m_sBottom * height / m_height ) - y; + if ( w > 0 && h > 0 ) + { + p->setBrush( Qt::SolidPattern ); + p->setPen( QPen( Qt::black, 1 ) ); // should not be necessary bug a Qt bug makes it necessary + p->setRasterOp( Qt::NotROP ); + p->drawRect( x, y, w, h ); + } + } } // else draw a blank area else p->fillRect( limits, Qt::white /*FIXME change to the page bg color*/ ); } - -bool KPDFPage::hasPixmap( int id, int width, int height ) const +bool KPDFPage::hasText( const QString & text, bool strictCase, bool fromTop ) { - if ( !m_pixmaps.contains( id ) ) + if ( !m_text ) return false; - QPixmap * p = m_pixmaps[ id ]; - return p ? ( p->width() == width && p->height() == height ) : false; + + const char * str = text.latin1(); + int len = text.length(); + Unicode *u = (Unicode *)gmalloc(len * sizeof(Unicode)); + for (int i = 0; i < len; ++i) + u[i] = (Unicode) str[i]; + + bool found = m_text->findText( u, len, fromTop ? gTrue : gFalse, gTrue, fromTop ? gFalse : gTrue, gFalse, &m_sLeft, &m_sTop, &m_sRight, &m_sBottom ); + if( found && strictCase ) + { + GString * orig = m_text->getText( m_sLeft, m_sTop, m_sRight, m_sBottom ); + found = !strcmp( text.latin1(), orig->getCString() ); + } + return found; } -bool KPDFPage::hasLink( int mouseX, int mouseY ) const +void KPDFPage::hilightLastSearch( bool on ) { - //TODO this. - //Sample implementation using a small rect as 'active' link zone - return QRect( 20,20, 100,50 ).contains( mouseX, mouseY ); + m_sEnabled = on; } +// END commands (paint / search) + void KPDFPage::setPixmap( int id, QPixmap * pixmap ) { @@ -115,38 +157,18 @@ void KPDFPage::setPixmap( int id, QPixmap * pixmap ) m_pixmaps[id] = pixmap; } +void KPDFPage::setSearchPage( TextPage * tp ) +{ + delete m_text; + m_text = tp; +} + /* -void KPDFPage::setPixmapOverlaySelection( const QRect & normalizedRect ); +void KPDFPage::setLinks( ..SomeStruct.. ) { //TODO this } + void KPDFPage::setPixmapOverlayNotations( ..DOMdescription.. ) { //TODO this } */ - -/* -void KPDFPage::setTextPage( TextOutputDev * textPage ) -{ - delete m_text; - m_text = 0; - if ( m_text ) - m_text = textPage; -} - -void KPDFPage::setLinks( ..SomeStruct.. ) -{ -} -*/ - -/*bool KPDFPage::hasText( QString & text ) -{ //TODO this - return text.isNull(); -// FIXME MOVED from the QOutputDev. Find over a textpage. -//bool find(Unicode *s, int len, GBool startAtTop, GBool stopAtBottom, GBool startAtLast, GBool stopAtLast, double *xMin, double *yMin, double *xMax, double *yMax) -//{return m_text -> findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, xMin, yMin, xMax, yMax);} -} - -const QRect & KPDFPage::textPosition() -{ //TODO this - return QRect(); -}*/ diff --git a/kpdf/page.h b/kpdf/page.h index 32467275e..a1417fa06 100644 --- a/kpdf/page.h +++ b/kpdf/page.h @@ -14,7 +14,7 @@ class QPainter; class QPixmap; //class QString; //class QRect; -class TextOutputDev; +class TextPage; /** * @short Collector for all the data belonging to a page. @@ -40,26 +40,28 @@ public: float ratio() const { return m_height / m_width; } float rotation() const { return m_rotation; } bool hasPixmap( int id, int width, int height ) const; + bool hasSearchPage() const; bool hasLink( int mouseX, int mouseY ) const; + + // commands void drawPixmap( int id, QPainter * p, const QRect & rect, int width, int height ) const; + bool hasText( const QString & text, bool strictCase, bool fromTop ); + void hilightLastSearch( bool enabled ); - // page contents setup *NOTE changes in progress* + // set page contents void setPixmap( int id, QPixmap * pixmap ); - /*void setTextPage( TextOutputDev * );*/ + void setSearchPage( TextPage * text ); /*void setLinks( ..SomeStruct.. ); or (better): */ - /*void setPixmapOverlaySelection( const QRect & normalizedRect );*/ /*void setPixmapOverlayNotations( ..DOMdescription.. );*/ - // FIND command - //bool hasText( QString & text ); - //const QRect & textPosition(); - private: int m_number, m_rotation; float m_width, m_height; + bool m_sEnabled; + double m_sLeft, m_sTop, m_sRight, m_sBottom; QMap m_pixmaps; - TextOutputDev * m_text; + TextPage * m_text; }; /* class KPDFLink