Backported search. Clean, elegant and SOO FAST after the 1st loop (well, if

you have enough ram). Based on aacid's search code. This implements case
sensitive search too (a little tricky but it worths). Finally done.

svn path=/branches/kpdf_experiments/kdegraphics/kpdf/; revision=347051
This commit is contained in:
Enrico Ros 2004-09-16 21:04:49 +00:00
parent 727d5dd57a
commit 4569e4eea9
7 changed files with 259 additions and 158 deletions

View file

@ -27,10 +27,7 @@
// NOTE: XPDF/Splash implementation dependant code will be marked with '###'
//------------------------------------------------------------------------
// KPDFOutputDev
//------------------------------------------------------------------------
// BEGIN KPDFOutputDev
KPDFOutputDev::KPDFOutputDev(SplashColor paperColor)
: SplashOutputDev(splashModeRGB8, false, paperColor),
m_pixmapWidth( -1 ), m_pixmapHeight( -1 ), m_pixmap( 0 ), m_text( 0 )
@ -71,8 +68,6 @@ TextPage * KPDFOutputDev::takeTextPage()
return text;
}
void KPDFOutputDev::startPage(int pageNum, GfxState *state)
{
m_pageNum = pageNum;
@ -96,7 +91,6 @@ void KPDFOutputDev::endPage()
if ( bw != m_pixmapWidth || bh != m_pixmapHeight )
{
// it may happen (in fact it doesn't) that we need rescaling
kdWarning() << "Pixmap at page '" << m_pageNum << "' needed rescale." << endl;
m_pixmap = new QPixmap( img->smoothScale( m_pixmapWidth, m_pixmapHeight ) );
}
@ -137,3 +131,47 @@ GBool KPDFOutputDev::beginType3Char(GfxState *state, double x, double y, double
m_text->addChar(state, x, y, dx, dy, code, u, uLen);
return SplashOutputDev::beginType3Char(state, x, y, dx, dy, code, u, uLen);
}
// END KPDFOutputDev
// BEGIN KPDFTextDev
KPDFTextDev::KPDFTextDev()
{
m_text = new TextPage( gFalse );
}
KPDFTextDev::~KPDFTextDev()
{
delete m_text;
}
TextPage * KPDFTextDev::takeTextPage()
{
TextPage * t = m_text;
m_text = 0;
return t;
}
void KPDFTextDev::startPage(int, GfxState *state)
{
if ( !m_text )
m_text = new TextPage( gFalse );
m_text->startPage(state);
}
void KPDFTextDev::endPage()
{
m_text->endPage();
m_text->coalesce(gTrue);
}
void KPDFTextDev::updateFont(GfxState *state)
{
m_text->updateFont(state);
}
void KPDFTextDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, Unicode *u, int uLen)
{
m_text->addChar(state, x, y, dx, dy, code, u, uLen);
}
// END KPDFTextDev

View file

@ -28,7 +28,7 @@ class TextPage;
class KPDFPage;
/**
* @short A SplashOutputDev rendered that grab text and links.
* @short A SplashOutputDev renderer that grabs text and links.
*
* This output device:
* - renders the page using SplashOutputDev (its parent)
@ -72,4 +72,45 @@ private:
TextPage * m_text;
};
/**
* @short Collect text into a takeable TextPage.
*
* This is the simplest OutputDev. It harvests text from currently
* rendered page and provides a method for getting the TextPage.
* Xpdf's textOutputDev can't return a textpage, unfortunately.
*/
class KPDFTextDev : public OutputDev
{
public:
KPDFTextDev();
virtual ~KPDFTextDev();
// takes pointers out of the class (so deletion it's up to others)
TextPage * takeTextPage();
/** inherited from OutputDev */
// top left corner is (0,0)
virtual GBool upsideDown() { return gTrue; }
// use draw char to get text data
virtual GBool useDrawChar() { return gTrue; }
// use drawChar even for Type3 chars
virtual GBool interpretType3Chars() { return gFalse; }
// do not pass non-text to this device
virtual GBool needNonText() { return gFalse; }
// Start a page.
virtual void startPage(int, GfxState *state);
// End a page.
virtual void endPage();
//----- update text state
virtual void updateFont(GfxState *state);
//----- text drawing
virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, Unicode *u, int uLen);
private:
// text page generated by execution
TextPage * m_text;
};
#endif

View file

@ -13,11 +13,13 @@
#include <qvaluevector.h>
#include <qmap.h>
#include <kdebug.h>
#include <klocale.h>
#include <kfinddialog.h>
#include <kmessagebox.h>
// local includes
#include "PDFDoc.h"
#include "QOutputDev.h"
//#include "TextOutputDev.h"
#include "kpdf_error.h"
#include "document.h"
@ -41,6 +43,14 @@ public:
float currentPosition;
QValueVector< KPDFPage* > pages;
// find related
QString lastSearchText;
long lastSearchOptions;
KPDFPage * lastSearchPage;
// filtering related
QString filterString;
// observers related (note: won't delete oservers)
QMap< int, KPDFDocumentObserver* > observers;
};
@ -59,6 +69,7 @@ KPDFDocument::KPDFDocument()
d->pdfdoc = 0;
d->currentPage = -1;
d->currentPosition = 0;
d->lastSearchPage = 0;
SplashColor paperColor;
paperColor.rgb8 = splashMakeRGB8( 0xff, 0xff, 0xff );
d->kpdfOutputDev = new KPDFOutputDev( paperColor );
@ -163,17 +174,22 @@ void KPDFDocument::requestPixmap( int id, uint page, int width, int height, bool
// in-place Pixmap generation for syncronous requests
if ( !kp->hasPixmap( id, width, height ) )
{
// set KPDFPage pointer to outputdevice for links/text harvesting
d->kpdfOutputDev->setParams( width, height, false );
// compute dpi used to get an image with desired width and height
double fakeDpiX = width * 72.0 / kp->width(),
fakeDpiY = height * 72.0 / kp->height();
// setup kpdf output device: text page is generated only if we are at 72dpi.
// since we can pre-generate the TextPage at the right res.. why not?
bool genTextPage = !kp->hasSearchPage() && (width == kp->width()) && (height == kp->height());
d->kpdfOutputDev->setParams( width, height, genTextPage );
d->docLock.lock();
d->pdfdoc->displayPage( d->kpdfOutputDev, page + 1, fakeDpiX, fakeDpiY, 0, true, true );
d->pdfdoc->displayPage( d->kpdfOutputDev, page + 1, fakeDpiX, fakeDpiY, 0, true, false/*dolinks*/ );
d->docLock.unlock();
kp->setPixmap( id, d->kpdfOutputDev->takePixmap() );
if ( genTextPage )
kp->setSearchPage( d->kpdfOutputDev->takeTextPage() );
d->observers[id]->notifyPixmapChanged( page );
}
@ -200,101 +216,80 @@ void KPDFDocument::slotSetCurrentPagePosition( int page, float position )
pageChanged();
}
void KPDFDocument::slotFind( bool /*nextMatch*/, const QString & /*text*/ )
void KPDFDocument::slotSetFilter( const QString & pattern )
{
/*
TextOutputDev *textOut;
Unicode *u;
bool found;
double xMin1, yMin1, xMax1, yMax1;
int len, pg;
d->filterString = pattern;
if ( pattern.length() > 3 )
sendFilteredPageList();
}
// This is more or less copied from what xpdf does, surely can be optimized
len = s.length();
u = (Unicode *)gmalloc(len * sizeof(Unicode));
for (int i = 0; i < len; ++i) u[i] = (Unicode)(s.latin1()[i] & 0xff);
// search current
found = m_outputDev->find(u, len, next);
if (!found)
{
// search following pages
textOut = new TextOutputDev(NULL, gTrue, gFalse, gFalse);
if (!textOut->isOk())
void KPDFDocument::slotFind( const QString & t, long opt )
{
// reload last options if in 'find next' case
long options = t.isEmpty() ? d->lastSearchOptions : opt;
QString text = t.isEmpty() ? d->lastSearchText : t;
if ( !t.isEmpty() )
{
gfree(u);
delete textOut;
return;
d->lastSearchText = t;
d->lastSearchOptions = opt;
}
pg = m_currentPage + 1;
while(!found && pg <= d->pdfdoc->getNumPages())
{
m_docMutex.lock();
d->pdfdoc->displayPage(textOut, pg, 72, 72, 0, gTrue, gFalse);
m_docMutex.unlock();
found = textOut->findText(u, len, gTrue, gTrue, gFalse, gFalse, &xMin1, &yMin1, &xMax1, &yMax1);
if (!found) pg++;
}
// check enabled options (only caseSensitive support until now)
bool caseSensitive = options & KFindDialog::CaseSensitive;
if (!found && m_currentPage != 1)
{
if (KMessageBox::questionYesNo(widget(), i18n("End of document reached.\nContinue from the beginning?")) == KMessageBox::Yes)
{
// search previous pages
pg = 1;
while(!found && pg < m_currentPage)
// continue checking last SearchPage first (if it is the current page)
KPDFPage * foundPage = 0;
int currentPage = d->currentPage;
int pageCount = d->pages.count();
if ( d->lastSearchPage && (int)d->lastSearchPage->number() == currentPage )
if ( d->lastSearchPage->hasText( text, caseSensitive, false ) )
foundPage = d->lastSearchPage;
else
{
m_docMutex.lock();
d->pdfdoc->displayPage(textOut, pg, 72, 72, 0, gTrue, gFalse);
m_docMutex.unlock();
found = textOut->findText(u, len, gTrue, gTrue, gFalse, gFalse, &xMin1, &yMin1, &xMax1, &yMax1);
if (!found) pg++;
}
}
}
delete textOut;
if (found)
{
kdDebug() << "found at " << pg << endl;
goToPage(pg);
// xpdf says: can happen that we do not find the text if coalescing is bad OUCH
//FIXME Enrico: expanded "m_outputDev(the widget)->find(u, len, false);" above:
bool PageWidget::find( Unicode * u, int len, bool next )
{return false; TODO !!restore!! Enrico
bool b;
if (!next)
{
// ensure we are searching the whole page
m_xMin = 0;
m_yMin = 0;
d->lastSearchPage->hilightLastSearch( false );
currentPage++;
pageCount--;
}
b = m_outputdev(a QOut..) -> find(u, len, !next, true, next, false, &m_xMin, &m_yMin, &m_xMax, &m_yMax);
m_xMin = m_xMin / m_zoomFactor;
m_yMin = m_yMin / m_zoomFactor;
m_xMax = m_xMax / m_zoomFactor;
m_yMax = m_yMax / m_zoomFactor;
m_selection = b;
updateContents();
return b;
}
// expanded ends here
if ( !foundPage )
// loop through the whole document
for ( int i = 0; i < pageCount; i++ )
{
if ( currentPage >= pageCount )
{
if ( KMessageBox::questionYesNo(0, i18n("End of document reached.\nContinue from the beginning?")) == KMessageBox::Yes )
currentPage = 0;
else
break;
}
KPDFPage * page = d->pages[ currentPage ];
if ( !page->hasSearchPage() )
{
// build a TextPage using the lightweight KPDFTextDev generator..
KPDFTextDev td;
d->docLock.lock();
d->pdfdoc->displayPage( &td, page->number()+1, 72, 72, 0, true, false );
d->docLock.unlock();
// ..and attach it to the page
page->setSearchPage( td.takeTextPage() );
}
if ( page->hasText( text, caseSensitive, true ) )
{
foundPage = page;
break;
}
currentPage++;
}
if ( foundPage )
{
d->lastSearchPage = foundPage;
foundPage->hilightLastSearch( true );
slotSetCurrentPage( foundPage->number() );
foreachObserver( notifyPixmapChanged( foundPage->number() ) );
}
else
{
if (next) KMessageBox::information(widget(), i18n("No more matches found for '%1'.").arg(s));
else KMessageBox::information(widget(), i18n("No matches found for '%1'.").arg(s));
}
}
if (found) m_findText = s;
else m_findText = QString::null;
gfree(u);
*/
KMessageBox::information( 0, i18n("No matches found for '%1'.").arg(text) );
}
void KPDFDocument::slotGoToLink( /* QString anchor */ )
@ -328,6 +323,7 @@ void KPDFDocument::deletePages()
delete d->pages[i];
d->pages.clear();
d->currentPage = -1;
d->lastSearchPage = 0;
}
/** TO BE IMPORTED:
@ -406,5 +402,4 @@ void ThumbnailList::customEvent(QCustomEvent *e)
}
*/
#include "document.moc"

View file

@ -33,7 +33,6 @@ public:
// commands from the Document to all observers
virtual void pageSetup( const QValueList<int> & /*pages*/ ) {};
virtual void pageSetCurrent( int /*pageNumber*/, float /*position*/ ) {};
//virtual void pageSetHilight( int /*x*/, int /*y*/, int /*width*/, int /*height*/ ) {};
};
#define PAGEWIDGET_ID 1
@ -72,7 +71,8 @@ public slots:
// document commands via slots
void slotSetCurrentPage( int page );
void slotSetCurrentPagePosition( int page, float position );
void slotFind( bool nextMatch, const QString & text = "" );
void slotSetFilter( const QString & pattern );
void slotFind( const QString & text = "", long options = 0 );
void slotGoToLink( /* QString anchor */ );
signals:

View file

@ -103,6 +103,7 @@ Part::Part(QWidget *parentWidget, const char *widgetName,
// Page Traversal actions
m_gotoPage = KStdAction::gotoPage( this, SLOT( slotGoToPage() ), ac, "goto_page" );
m_gotoPage->setShortcut( "CTRL+G" );
m_prevPage = KStdAction::prior(this, SLOT(slotPreviousPage()), ac, "previous_page");
m_prevPage->setWhatsThis( i18n( "Moves to the previous page of the document" ) );
@ -166,7 +167,6 @@ bool Part::openFile()
{
bool ok = document->openFile( m_file );
m_find->setEnabled( ok );
m_findNext->setEnabled( ok );
return ok;
}
@ -257,14 +257,17 @@ void Part::slotGotoLast()
void Part::slotFind()
{
KFindDialog dlg(widget());
KFindDialog dlg( widget() );
if (dlg.exec() == QDialog::Accepted)
document->slotFind( false, dlg.pattern() );
{
m_findNext->setEnabled( true );
document->slotFind( dlg.pattern(), dlg.options() );
}
}
void Part::slotFindNext()
{
document->slotFind( true );
document->slotFind();
}
void Part::slotSaveFileAs()

View file

@ -23,7 +23,9 @@
// TODO think about moving rendering ...
KPDFPage::KPDFPage( int page, float w, float h, int r )
: m_number( page ), m_rotation( r ), m_width( w ), m_height( h ), m_text( 0 )
: m_number( page ), m_rotation( r ), m_width( w ), m_height( h ),
m_sEnabled( false ), m_sLeft( 0 ), m_sTop( 0 ), m_sRight( 0 ),
m_sBottom( 0 ), m_text( 0 )
{
}
@ -36,6 +38,27 @@ KPDFPage::~KPDFPage()
}
bool KPDFPage::hasPixmap( int id, int width, int height ) const
{
if ( !m_pixmaps.contains( id ) )
return false;
QPixmap * p = m_pixmaps[ id ];
return p ? ( p->width() == width && p->height() == height ) : false;
}
bool KPDFPage::hasSearchPage() const
{
return (m_text != 0);
}
bool KPDFPage::hasLink( int mouseX, int mouseY ) const
{
//TODO this.
//Sample implementation using a small rect as 'active' link zone
return QRect( 20,20, 100,50 ).contains( mouseX, mouseY );
}
// BEGIN commands (paint / search)
void KPDFPage::drawPixmap( int id, QPainter * p, const QRect & limits, int width, int height ) const
{
QPixmap * pixmap = 0;
@ -79,34 +102,53 @@ void KPDFPage::drawPixmap( int id, QPainter * p, const QRect & limits, int width
p->drawLine( 0, 0, width, height );
p->drawLine( 0, height, width, 0 );
}
// draw selection (FIXME Enrico: move selection stuff inside PAGE!!)
/*if ( there is something to hilght )
p->setBrush(Qt::SolidPattern);
p->setPen(QPen(Qt::black, 1)); // should not be necessary bug a Qt bug makes it necessary
p->setRasterOp(Qt::NotROP);
p->drawRect(qRound(m_xMin*m_zoomFactor), qRound(m_yMin*m_zoomFactor), qRound((m_xMax- m_xMin)*m_zoomFactor), qRound((m_yMax- m_yMin)*m_zoomFactor));
*/
// draw selection
if ( m_sEnabled )
{
int x = (int)( m_sLeft * width / m_width ),
y = (int)( m_sTop * height / m_height ),
w = (int)( m_sRight * width / m_width ) - x,
h = (int)( m_sBottom * height / m_height ) - y;
if ( w > 0 && h > 0 )
{
p->setBrush( Qt::SolidPattern );
p->setPen( QPen( Qt::black, 1 ) ); // should not be necessary bug a Qt bug makes it necessary
p->setRasterOp( Qt::NotROP );
p->drawRect( x, y, w, h );
}
}
}
// else draw a blank area
else
p->fillRect( limits, Qt::white /*FIXME change to the page bg color*/ );
}
bool KPDFPage::hasPixmap( int id, int width, int height ) const
bool KPDFPage::hasText( const QString & text, bool strictCase, bool fromTop )
{
if ( !m_pixmaps.contains( id ) )
if ( !m_text )
return false;
QPixmap * p = m_pixmaps[ id ];
return p ? ( p->width() == width && p->height() == height ) : false;
const char * str = text.latin1();
int len = text.length();
Unicode *u = (Unicode *)gmalloc(len * sizeof(Unicode));
for (int i = 0; i < len; ++i)
u[i] = (Unicode) str[i];
bool found = m_text->findText( u, len, fromTop ? gTrue : gFalse, gTrue, fromTop ? gFalse : gTrue, gFalse, &m_sLeft, &m_sTop, &m_sRight, &m_sBottom );
if( found && strictCase )
{
GString * orig = m_text->getText( m_sLeft, m_sTop, m_sRight, m_sBottom );
found = !strcmp( text.latin1(), orig->getCString() );
}
return found;
}
bool KPDFPage::hasLink( int mouseX, int mouseY ) const
void KPDFPage::hilightLastSearch( bool on )
{
//TODO this.
//Sample implementation using a small rect as 'active' link zone
return QRect( 20,20, 100,50 ).contains( mouseX, mouseY );
m_sEnabled = on;
}
// END commands (paint / search)
void KPDFPage::setPixmap( int id, QPixmap * pixmap )
{
@ -115,38 +157,18 @@ void KPDFPage::setPixmap( int id, QPixmap * pixmap )
m_pixmaps[id] = pixmap;
}
void KPDFPage::setSearchPage( TextPage * tp )
{
delete m_text;
m_text = tp;
}
/*
void KPDFPage::setPixmapOverlaySelection( const QRect & normalizedRect );
void KPDFPage::setLinks( ..SomeStruct.. )
{ //TODO this
}
void KPDFPage::setPixmapOverlayNotations( ..DOMdescription.. )
{ //TODO this
}
*/
/*
void KPDFPage::setTextPage( TextOutputDev * textPage )
{
delete m_text;
m_text = 0;
if ( m_text )
m_text = textPage;
}
void KPDFPage::setLinks( ..SomeStruct.. )
{
}
*/
/*bool KPDFPage::hasText( QString & text )
{ //TODO this
return text.isNull();
// FIXME MOVED from the QOutputDev. Find over a textpage.
//bool find(Unicode *s, int len, GBool startAtTop, GBool stopAtBottom, GBool startAtLast, GBool stopAtLast, double *xMin, double *yMin, double *xMax, double *yMax)
//{return m_text -> findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, xMin, yMin, xMax, yMax);}
}
const QRect & KPDFPage::textPosition()
{ //TODO this
return QRect();
}*/

View file

@ -14,7 +14,7 @@ class QPainter;
class QPixmap;
//class QString;
//class QRect;
class TextOutputDev;
class TextPage;
/**
* @short Collector for all the data belonging to a page.
@ -40,26 +40,28 @@ public:
float ratio() const { return m_height / m_width; }
float rotation() const { return m_rotation; }
bool hasPixmap( int id, int width, int height ) const;
bool hasSearchPage() const;
bool hasLink( int mouseX, int mouseY ) const;
// commands
void drawPixmap( int id, QPainter * p, const QRect & rect, int width, int height ) const;
bool hasText( const QString & text, bool strictCase, bool fromTop );
void hilightLastSearch( bool enabled );
// page contents setup *NOTE changes in progress*
// set page contents
void setPixmap( int id, QPixmap * pixmap );
/*void setTextPage( TextOutputDev * );*/
void setSearchPage( TextPage * text );
/*void setLinks( ..SomeStruct.. ); or (better): */
/*void setPixmapOverlaySelection( const QRect & normalizedRect );*/
/*void setPixmapOverlayNotations( ..DOMdescription.. );*/
// FIND command
//bool hasText( QString & text );
//const QRect & textPosition();
private:
int m_number, m_rotation;
float m_width, m_height;
bool m_sEnabled;
double m_sLeft, m_sTop, m_sRight, m_sBottom;
QMap<int,QPixmap *> m_pixmaps;
TextOutputDev * m_text;
TextPage * m_text;
};
/*
class KPDFLink