Logo Search packages:      
Sourcecode: kbib version File versions  Download package

ieeexploresearcher.cpp

//
// C++ Implementation: ieeexploresearcher
//
// Description:
//
//
// Author: Thach Nguyen <thach.nguyen@rmit.edu.au>, (C) 2006
//
// Copyright: See COPYING file that comes with this distribution
//
//
#include "ieeexploresearcher.h"
#include "searchmanager.h"
#include "bibfile.h"
#include "filters/bibprogs.h"

#include <klocale.h>
#include <kdialog.h>
#include <kconfig.h>
#include <kstandarddirs.h>
#include <klineedit.h>
#include <kaccelmanager.h>

#include <qlabel.h>
#include <qlayout.h>
#include <qhbox.h>
#include <qfile.h>
#include <qwhatsthis.h>
#include <qregexp.h>


//static const char*  IEEEXPLORE_BASE_URL = "http://ieeexplore.ieee.org.ezproxy.lib.rmit.edu.au";
//static const char*  IEEEXPLORE_BASE_URL = "http://ieeexplore.ieee.org";
//static const char*  IEEEXPLORE_SEARCH_PART = "search/searchresult.jsp";


IEEEXploreSearcher::IEEEXploreSearcher(QObject *parent, const char *name)
        : searcher(parent, name),m_step(Begin), m_started(false)
{
      m_host = QString::fromLatin1("http://ieeexplore.ieee.org");
      m_freeSearch = false;
}


IEEEXploreSearcher::~IEEEXploreSearcher()
{}

QString IEEEXploreSearcher::defaultName()
{
    return QString::fromLatin1("IEEEXplore");
}

QString IEEEXploreSearcher::source() const
{
    return m_name.isEmpty() ? defaultName() : m_name;
}

void IEEEXploreSearcher::readConfig(KConfig* config_, const QString& group_)
{
    KConfigGroupSaver groupSaver(config_, group_);
    QString s = config_->readEntry("Name", defaultName()); // default to IEEEXplore
    if(!s.isEmpty())
    {
        m_name = s;
    }
    m_host = config_->readEntry("Host", QString::fromLatin1("http://ieeexplore.ieee.org"));
    m_freeSearch = config_->readBoolEntry("Free Search", false);
}


void IEEEXploreSearcher::saveConfig(KConfig* config)
{
    config->writeEntry("Name", m_name);
    config->writeEntry("Host", m_host);
    config->writeEntry("Free Search", m_freeSearch);

}

void IEEEXploreSearcher::search(SearchKey key1, SearchKey key2, SearchKey key3 , const QString& value1, const QString& value2, const QString& value3, int operator1, int operator2)
{
    cerr << "Searching IEEEXplore\n";
    m_started = true;

    m_data.truncate(0);

    m_url = KURL(m_host);
    if (m_freeSearch)
        m_url.addPath(QString::fromLatin1("search/freesearchresult.jsp"));
    else
        m_url.addPath(QString::fromLatin1("search/searchresult.jsp"));

    QString str;
    m_query = QString();

    if (!value1.isEmpty())
    {
        str = value1 + QString::fromLatin1("<in>");
        switch(key1)
        {
        case Title:
            m_query = str + QString::fromLatin1("ti");
            break;

        case Author:
            m_query = str + QString::fromLatin1("au");
            break;

        case Keyword:
            m_query = str + QString::fromLatin1("de");
            break;

        case All:
            m_query = str + QString::fromLatin1("metadata");
            break;

        case Journal:
            m_query = str + QString::fromLatin1("jn");
            break;
                  
            case Year:
                  m_query = QString::fromLatin1("(pyr >= ") +  value1 + QString::fromLatin1("<and> pyr <= ") + value1 + QString::fromLatin1(")") ;
                  break;      
        default:
            stop();
            return;
        }
    }


    if (!value2.isEmpty() )
    {
        if (!m_query.isEmpty())
        {
            switch(operator1)
            {
            case 0:
                m_query += QString::fromLatin1("<and>");
                break;
            case 1:
                m_query += QString::fromLatin1("<or>");
                break;
            case 2:
                m_query += QString::fromLatin1("<not>");
                break;
            default:
                stop();
                return;
            }

        }
        str = value2 + QString::fromLatin1("<in>");
        switch(key2)
        {
        case Title:
            m_query += str + QString::fromLatin1("ti");
            break;

        case Author:
            m_query += str + QString::fromLatin1("au");
            break;

        case Keyword:
            m_query += str + QString::fromLatin1("de");
            break;

        case All:
            m_query += str + QString::fromLatin1("metadata");
            break;

        case Journal:
            m_query += str + QString::fromLatin1("jn");
            break;
            case Year:
                  m_query += QString::fromLatin1("(pyr >= ") +  value2 + QString::fromLatin1("<and> pyr <= ") + value2 + QString::fromLatin1(")") ;
                  break;      
        default:
            stop();
            return;
        }

    }

    if (!value3.isEmpty() )
    {
        if (!m_query.isEmpty())
        {
            switch(operator2)
            {
            case 0:
                m_query += QString::fromLatin1("<and>");
                break;
            case 1:
                m_query += QString::fromLatin1("<or>");
                break;
            case 2:
                m_query += QString::fromLatin1("<not>");
                break;
            default:
                stop();
                return;
            }

        }
        str = value3 + QString::fromLatin1("<in>");
        switch(key3)
        {
        case Title:
            m_query += str + QString::fromLatin1("ti");
            break;

        case Author:
            m_query += str + QString::fromLatin1("au");
            break;

        case Keyword:
            m_query += str + QString::fromLatin1("de");
            break;

        case All:
            m_query += str + QString::fromLatin1("metadata");
            break;

        case Journal:
            m_query += str + QString::fromLatin1("jn");
            break;

            case Year:
                  m_query += QString::fromLatin1("(pyr >= ") +  value3 + QString::fromLatin1("<and> pyr <= ") + value3 + QString::fromLatin1(")") ;
                  break;      
                  
        default:
            stop();
            return;
        }

    }


    if (m_query.isEmpty())
    {
        stop();
        return;
    }
    m_url.addQueryItem(QString::fromLatin1("queryText"), m_query);
    m_url.addQueryItem(QString::fromLatin1("ResultCount"), "1");
    m_url.addQueryItem(QString::fromLatin1("ResultStart"), "0");

    m_step = Search;
    std::cerr << m_url.url() << "\n";
    m_job = KIO::get(m_url, false, false);
    connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
            SLOT(slotData(KIO::Job*, const QByteArray&)));
    connect(m_job, SIGNAL(result(KIO::Job*)),
            SLOT(slotComplete(KIO::Job*)));

}

void IEEEXploreSearcher::stop()
{
    if(!m_started)
    {
        return;
    }
    if(m_job)
    {
        m_job->kill();
        m_job = 0;
    }
    m_started = false;
    m_data.truncate(0);
    m_step = Begin;
    emit signalDone(this);
}

void IEEEXploreSearcher::slotData(KIO::Job*, const QByteArray& data_)
{
    QDataStream stream(m_data, IO_WriteOnly | IO_Append);
    stream.writeRawBytes(data_.data(), data_.size());
}

void IEEEXploreSearcher::slotComplete(KIO::Job* job_)
{
    // since the fetch is done, don't worry about holding the job pointer
    m_job = 0;

    if(job_->error())
    {
        emit signalMessage(job_->errorString(), 0);
        stop();
        return;
    }

    if(m_data.isEmpty())
    {
        std::cerr << "IEEEXploreSearcher::slotComplete() - no data\n";
        stop();
        return;
    }

    switch(m_step)
    {
    case Search:
        searchResults();
        break;
    case Fetch:
        fetchResults();
        break;
    default:
        std::cerr << "IEEEXploreSearcher::slotComplete() - wrong step = " << m_step << "\n";
        break;
    }
}

void IEEEXploreSearcher::searchResults()
{
    QString str = QString::fromUtf8(m_data, m_data.size());

    /*
    QFile f(QString::fromLatin1("/home/s9510300/tmp/test_ieee.html"));
    if(f.open(IO_WriteOnly)) {
      QTextStream t(&f);
      t << str;
    }
    f.close();
      
    //Testing

    QFile f(QString::fromLatin1("/home/s9510300/tmp/ieeexplore.html"));
    QString str;
    if(f.open(IO_ReadOnly)) {
      QTextStream stream( &f );
      str = stream.read(); 
      f.close();
    }
    //////////////
      */

    QRegExp rx(QString::fromLatin1(".*Your search matched <strong>(\\d+)</strong>.*"));

    if(!rx.exactMatch(str))
    {
        stop();
    }
    m_total = rx.cap(1).toInt();
    m_waitingRetrieveRange = true;
    m_step = Wait;
    if (m_total > 0)
    {
        emit signalQueryResult(m_total);
    }
    else
    {
        signalMessage(i18n("No reference was found"), 1);
        stop();
    }

}


void IEEEXploreSearcher::retrieveRange(unsigned int min, unsigned int max)
{
    if (m_step != Wait)
            return;
            
      m_waitingRetrieveRange = false;
    if (min < 1 && max < 1)
    {
        stop();
        return;
    }
    m_start = min;
    m_end = max;

    m_url = KURL(m_host);
    if (m_freeSearch)
        m_url.addPath(QString::fromLatin1("search/freesearchresult.jsp"));
    else
        m_url.addPath(QString::fromLatin1("search/searchresult.jsp"));

    m_url.addQueryItem(QString::fromLatin1("queryText"), m_query);
    m_url.addQueryItem(QString::fromLatin1("ResultCount"), QString::number(m_end-m_start+1));
    m_url.addQueryItem(QString::fromLatin1("ResultStart"), QString::number(m_start-1));

    m_data.truncate(0);
    m_step = Fetch;
    std::cerr << m_url.url() << "\n";
    m_job = KIO::get(m_url, false, false);
    connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
            SLOT(slotData(KIO::Job*, const QByteArray&)));
    connect(m_job, SIGNAL(result(KIO::Job*)),
            SLOT(slotComplete(KIO::Job*)));
    cerr << "start end " << m_start << " " << m_end << "\n";
}


void IEEEXploreSearcher::fetchResults()
{
    QString str = QString::fromUtf8(m_data, m_data.size());

            //Testing
      /*          
          cerr << "Fetching...\n";
      QFile f(QString::fromLatin1("/home/s9510300/tmp/test_ieee2.html"));
      QString str;
      if(f.open(IO_ReadOnly)) {
            QTextStream stream( &f );
            str = stream.read(); 
            f.close();
      }
      //////////////
      */    
    for (int i = m_start; i <= m_end; i++)
    {
        QRegExp rx1(QString::fromLatin1("<div align=\"left\">.*<strong>%1").arg(i).append(QString::fromLatin1(".</strong>")));
        int startIndex = rx1.search(str) + rx1.matchedLength();
        QRegExp rx2(QString::fromLatin1("</table></td>") );
        int endIndex = rx2.search(str, startIndex);
        QString s = str.mid(startIndex, endIndex-startIndex);

        BibEntry *entry;
        QString publication;
            if (s.find("IEEE JNL") >= 0 || s.find("IEE JNL") >= 0)
        {   //A journal
            entry = new BibEntry("article", "");
            publication = QString::fromLatin1("journal");
        }
        else
        {
            entry = new BibEntry("inproceedings", "");
            publication = QString::fromLatin1("booktitle");
        }
        QRegExp entryRx1(QString::fromLatin1(".*<strong>(.+)</strong><br>\\s+(.+)<br>\\s+<A href='(.+)'>(.+)</A><br>\\s+Volume (.+),&nbsp;\\s*(\\d\\d\\d\\d)\\s+Page\\(s\\):.*" ) );
        QRegExp entryRx2(QString::fromLatin1(".*<strong>(.+)</strong><br>\\s+(.+)<br>\\s+<A href='(.+)'>(.+)</A><br>\\s+Volume (.+),&nbsp;\\s+Issue (\\d+),&nbsp;\\s*(.+) .*(\\d\\d\\d\\d)\\s+Page\\(s\\):.*" ) );
        QRegExp entryRx3(QString::fromLatin1(".*<strong>(.+)</strong><br>\\s+(.+)<br>\\s+<A href='(.+)'>(.+)</A><br>\\s+Volume (.+),&nbsp;\\s+Issue (\\d+),&nbsp;\\s+Part (\\d+),&nbsp;\\s*(.+)? (\\d\\d\\d\\d)\\s+Page\\(s\\):.*" ) );
            QRegExp entryRx4(QString::fromLatin1(".*<strong>(.+)</strong><br>\\s+(.+)<br>\\s+<A href='(.+)'>(.+)</A><br>\\sVolume\\s+(.+),&nbsp;\\s+(.+)(\\d\\d\\d\\d)\\s+Page\\(s\\):.*") );
            
            QRegExp entryRx5(QString::fromLatin1(".*<strong>(.+)</strong><br>\\s+(.+)<br>\\s+<A href='(.+)'>(.+)</A><br>\\s+.+\\s+(.+)\\s+(\\d\\d\\d\\d)\\s+Page\\(s\\):.*") );
        s = s.simplifyWhiteSpace();
        //        std::cerr << "Sub string = " << s.ascii() << "---------------\n";
        QString text;
        if (entryRx1.exactMatch(s))
        {
                  text = entryRx1.cap(1);
            entry->setField("title", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("title", false);

            text = entryRx1.cap(2);
            text = text.replace(QString::fromLatin1("; "), QString::fromLatin1(" and "));
            text = text.replace(QString::fromLatin1(";"), QString::fromLatin1(""));
            entry->setField("author", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("author", false);


            text = entryRx1.cap(4);
            entry->setField(const_cast<char*>(publication.ascii()), const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator(publication.ascii(), false);

            text = entryRx1.cap(5);
            entry->setField("volume", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("volume", false);

            text = entryRx1.cap(6);
            entry->setField("year", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("year", false);
        }
        else if (entryRx2.exactMatch(s))
        {
                  text = entryRx2.cap(1);
            entry->setField("title", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("title", false);

            text = entryRx2.cap(2);
            text = text.replace(QString::fromLatin1("; "), QString::fromLatin1(" and "));
            text = text.replace(QString::fromLatin1(";"), QString::fromLatin1(""));
            entry->setField("author", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("author", false);

            text = entryRx2.cap(4);
            entry->setField(const_cast<char*>(publication.ascii()), const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator(publication.ascii(), false);

            text = entryRx2.cap(5);
            entry->setField("volume", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("volume", false);

            text = entryRx2.cap(6);
            entry->setField("number", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("issue", false);

            text = entryRx2.cap(7);
            entry->setField("month", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("month", false);

            text = entryRx2.cap(8);
            entry->setField("year", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("year", false);
        }
        else if(entryRx3.exactMatch(s))
        {
                  text = entryRx3.cap(1);
            entry->setField("title", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("title", false);


            text = entryRx3.cap(2);
            text = text.replace(QString::fromLatin1("; "), QString::fromLatin1(" and "));
            text = text.replace(QString::fromLatin1(";"), QString::fromLatin1(""));
            entry->setField("author", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("author", false);

            text = entryRx3.cap(4);
            entry->setField(const_cast<char*>(publication.ascii()), const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator(publication.ascii(), false);

            text = entryRx3.cap(5);
            entry->setField("volume", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("volume", false);

            text = entryRx3.cap(6);
            entry->setField("number", const_cast<char*>(text.ascii()));

            text = entryRx3.cap(8);
            entry->setField("month", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("month", false);

            text = entryRx3.cap(9);
            entry->setField("year", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("year", false);
        }
            
            else if (entryRx4.exactMatch(s))
            {
                  text = entryRx4.cap(1);
                  entry->setField("title", const_cast<char*>(text.ascii()));
                  entry->setStringMacroIndicator("title", false);

                  text = entryRx4.cap(2);
                  text = text.replace(QString::fromLatin1("; "), QString::fromLatin1(" and "));
                  text = text.replace(QString::fromLatin1(";"), QString::fromLatin1(""));
                  entry->setField("author", const_cast<char*>(text.ascii()) );
                  entry->setStringMacroIndicator("author", false);

                  text = entryRx4.cap(4);
                  entry->setField(const_cast<char*>(publication.ascii()), const_cast<char*>(text.ascii()));
                  entry->setStringMacroIndicator(publication.ascii(), false);

                  text = entryRx4.cap(5);
                  entry->setField("volume", const_cast<char*>(text.ascii()));
                  entry->setStringMacroIndicator("volume", false);
                  
                  text = entryRx4.cap(6);
                  entry->setField("month", const_cast<char*>(text.ascii()));
                  entry->setStringMacroIndicator("month", false);

                  text = entryRx4.cap(7);
                  entry->setField("year", const_cast<char*>(text.ascii()));
                  entry->setStringMacroIndicator("year", false);
            }
            
        else if (entryRx5.exactMatch(s))
        {
                  text = entryRx5.cap(1);
            entry->setField("title", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("title", false);

            text = entryRx5.cap(2);
            text = text.replace(QString::fromLatin1("; "), QString::fromLatin1(" and "));
            text = text.replace(QString::fromLatin1(";"), QString::fromLatin1(""));
            entry->setField("author", const_cast<char*>(text.ascii()) );
            entry->setStringMacroIndicator("author", false);

            text = entryRx5.cap(4);
            entry->setField(const_cast<char*>(publication.ascii()), const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator(publication.ascii(), false);

            text = entryRx5.cap(5);
            entry->setField("month", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("month", false);

            text = entryRx5.cap(6);
            entry->setField("year", const_cast<char*>(text.ascii()));
            entry->setStringMacroIndicator("year", false);
        }
        //get pages

        startIndex = s.find(QString::fromLatin1("Page(s):") );
        if (startIndex > 0)
        {
            endIndex = s.find(QString::fromLatin1("<br>"), startIndex);
            text = (s.mid(startIndex+8, endIndex-startIndex-8)).stripWhiteSpace();

            if (!text.isEmpty())
            {
                entry->setField("pages", const_cast<char*>(text.ascii()));
                entry->setStringMacroIndicator("pages", false);
            }
        }

        //get DOI
        startIndex = s.find(QString::fromLatin1("Digital Object Identifier"));
        if (startIndex > 0)
        {
            endIndex = s.find(QString::fromLatin1("<br>"), startIndex) ;
            text = (s.mid(startIndex+25, endIndex-startIndex-25)).stripWhiteSpace();
            if (!text.isEmpty())
            {
                entry->setField("doi", const_cast<char*>(text.ascii()));
                entry->setStringMacroIndicator("doi", false);
            }
        }

        //get URL
        QRegExp urlRx(QString::fromLatin1(".*<a href=\"/search/srchabstract\\.jsp\\?(.+)\" class=\"(bodyCopy|bodyCopySpaced)\">Abstract.*") );
        if (urlRx.exactMatch(s))
        {
            text = urlRx.cap(1);
            if (!text.isEmpty())
            {
                text = m_host+(QString::fromLatin1("/search/srchabstract.jsp?") ) + text;
                entry->setField("url", const_cast<char*>(text.ascii()));
                entry->setStringMacroIndicator("url", false);
            }
        }

        //get PDF
        QRegExp pdfRx(QString::fromLatin1(".*Full Text: <A href=\"(.+)\" class=\"(bodyCopy|bodyCopySpaced)\">PDF</A>.*") );
        if (pdfRx.exactMatch(s))
        {
            text = pdfRx.cap(1);
            if (!text.isEmpty())
            {
                text = m_host+text;
                entry->setField("file", const_cast<char*>(text.ascii()));
                entry->setStringMacroIndicator("file", false);
            }
        }

        if (entry)
        {
            entry->createKey("");
            emit signalResultFound(new BibEntry(*entry));
            delete entry;

        }
    }
    stop();

}


void IEEEXploreSearcher::setSource(const QString s)
{
    m_name = s ;
}


QStringList IEEEXploreSearcher::searchKey()
{
    QStringList keyList;
    if (m_freeSearch)
        keyList << searchManager::self()->searchKeyString(All);
    else
    {
        keyList << searchManager::self()->searchKeyString(All) << searchManager::self()->searchKeyString(Author)
        << searchManager::self()->searchKeyString(Title) << searchManager::self()->searchKeyString(Journal)
        << searchManager::self()->searchKeyString(Keyword) << searchManager::self()->searchKeyString(Year);
    }
    return keyList;
}


int   IEEEXploreSearcher::maxSearchTerms()
{
    if (m_freeSearch)
        return 1;
    else
        return 100;
}

SearcherConfigWidget* IEEEXploreSearcher::configWidget(QWidget* parent_)
{
    return new IEEEXploreConfigWidget(parent_, this);
}

IEEEXploreConfigWidget::IEEEXploreConfigWidget(QWidget* parent_, IEEEXploreSearcher* searcher_/*=0*/)
        : SearcherConfigWidget(parent_)
{
    m_searcher = searcher_;
    QVBoxLayout* l = new QVBoxLayout(optionsWidget());
    l->setMargin(KDialog::marginHint());
    l->setSpacing(KDialog::spacingHint());
    QHBox *box1 = new QHBox(optionsWidget());
    //      box1->setMargin(KDialog::marginHint());
    box1->setSpacing(KDialog::spacingHint());
    QLabel *hostLabel = new QLabel(i18n("Host:"), box1);
    m_hostEdit = new KLineEdit(box1);
    QString w = i18n("Enter the host name of the server. By default it is http://ieeexplore.iee.org. If your instution uses different server address, enter it here.");
    QWhatsThis::add(hostLabel, w);
    QWhatsThis::add(m_hostEdit, w);
    hostLabel->setBuddy(m_hostEdit);
    l->addWidget(box1);

    m_freeSearchSelect = new QCheckBox(i18n("Use free search"), optionsWidget());
    w = i18n("Select to use free search service from IEEEXplore. With free search you can only use single search term and search in all fields.");
    QWhatsThis::add(m_freeSearchSelect, w);
    l->addWidget(m_freeSearchSelect);

    //l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
    l->addStretch();

    if(searcher_)
    {
        m_hostEdit->setText(searcher_->m_host);
        m_freeSearchSelect->setChecked(searcher_->m_freeSearch);
    }
    else
    {
        m_hostEdit->setText(QString::fromLatin1("http://ieeexplore.ieee.org"));
        m_freeSearchSelect->setChecked(false);
    }


    KAcceleratorManager::manage(optionsWidget());
}

void IEEEXploreConfigWidget::updateSearcher()
{
    IEEEXploreSearcher *s = static_cast<IEEEXploreSearcher*>(m_searcher);
    s->m_host = m_hostEdit->text().stripWhiteSpace();
    s->m_freeSearch = m_freeSearchSelect->isChecked();
}


#include "ieeexploresearcher.moc"

Generated by  Doxygen 1.6.0   Back to index