/***************************************************************************
 *   SPDX-License-Identifier: GPL-2.0-or-later
 *                                                                         *
 *   SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
 ***************************************************************************/

#include "onlinesearchpubmed.h"

#include <QNetworkReply>
#include <QDateTime>
#include <QXmlStreamReader>
#include <QRegularExpression>

#ifdef HAVE_KF
#include <KLocalizedString>
#include <KMessageBox>
#else // HAVE_KF
#define i18n(text) QObject::tr(text)
#endif // HAVE_KF

#include <EncoderXML>
#include <FileImporterBibTeX>
#include "internalnetworkaccessmanager.h"
#include "logging_networking.h"

qint64 OnlineSearchPubMed::lastQueryEpoch = 0;

class OnlineSearchPubMed::OnlineSearchPubMedPrivate
{
private:
    const QString pubMedUrlPrefix;

public:
    static const int maxNumResults;
    static const qint64 queryChokeTimeout;

    OnlineSearchPubMedPrivate(OnlineSearchPubMed *)
            : pubMedUrlPrefix(QStringLiteral("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"))
    {
        // nothing
    }

    QUrl buildQueryUrl(const QMap<QueryKey, QString> &query, int numResults) {
        /// used to auto-detect PMIDs (unique identifiers for documents) in free text search
        static const QRegularExpression pmidRegExp(QStringLiteral("^[0-9]{6,}$"));

        QString url = pubMedUrlPrefix + QStringLiteral("esearch.fcgi?db=pubmed&tool=kbibtex&term=");

        const QStringList freeTextWords = OnlineSearchAbstract::splitRespectingQuotationMarks(query[QueryKey::FreeText]);
        const QStringList yearWords = OnlineSearchAbstract::splitRespectingQuotationMarks(query[QueryKey::Year]);
        const QStringList titleWords = OnlineSearchAbstract::splitRespectingQuotationMarks(query[QueryKey::Title]);
        const QStringList authorWords = OnlineSearchAbstract::splitRespectingQuotationMarks(query[QueryKey::Author]);

        /// append search terms
        QStringList queryFragments;
        queryFragments.reserve(freeTextWords.size() + yearWords.size() + titleWords.size() + authorWords.size());

        /// add words from "free text" field, but auto-detect PMIDs
        for (const QString &text : freeTextWords)
            queryFragments.append(text + (pmidRegExp.match(text).hasMatch() ? QString() : QStringLiteral("[All Fields]")));

        /// add words from "year" field
        for (const QString &text : yearWords)
            queryFragments.append(text);

        /// add words from "title" field
        for (const QString &text : titleWords)
            queryFragments.append(text + QStringLiteral("[Title]"));

        /// add words from "author" field
        for (const QString &text : authorWords)
            queryFragments.append(text + QStringLiteral("[Author]"));

        /// Join all search terms with an AND operation
        url.append(queryFragments.join(QStringLiteral("+AND+")));
        url = url.replace(u'"', QStringLiteral("%22"));

        /// set number of expected results
        url.append(QString(QStringLiteral("&retstart=0&retmax=%1&retmode=xml")).arg(numResults));

        return QUrl::fromUserInput(url);
    }

    QUrl buildFetchIdUrl(const QStringList &idList) {
        const QString urlText = pubMedUrlPrefix + QStringLiteral("efetch.fcgi?retmode=xml&db=pubmed&id=") + idList.join(QStringLiteral(","));
        return QUrl::fromUserInput(urlText);
    }

    QVector<QSharedPointer<Entry>> parsePubMedXML(const QByteArray &xmlData, bool *ok = nullptr) {
        QVector<QSharedPointer<Entry>> result;

        // Source code generated by Python script 'onlinesearch-parser-generator.py'
        // using information from configuration file 'onlinesearchpubmed-parser.in.cpp'
        #include "onlinesearch/onlinesearchpubmed-parser.generated.cpp"

        return result;
    }
};

const int OnlineSearchPubMed::OnlineSearchPubMedPrivate::maxNumResults = 25;
const qint64 OnlineSearchPubMed::OnlineSearchPubMedPrivate::queryChokeTimeout = 10; /// 10 seconds


OnlineSearchPubMed::OnlineSearchPubMed(QObject *parent)
        : OnlineSearchAbstract(parent), d(new OnlineSearchPubMed::OnlineSearchPubMedPrivate(this))
{
    /// nothing
}

OnlineSearchPubMed::~OnlineSearchPubMed()
{
    delete d;
}

void OnlineSearchPubMed::startSearch(const QMap<QueryKey, QString> &query, int numResults)
{
    m_hasBeenCanceled = false;
    Q_EMIT progress(curStep = 0, numSteps = 2);

    /// enforcing limit on number of results
    numResults = qMin(OnlineSearchPubMedPrivate::maxNumResults, numResults);
    /// enforcing choke on number of searches per time
    if (QDateTime::currentSecsSinceEpoch() - lastQueryEpoch < OnlineSearchPubMedPrivate::queryChokeTimeout) {
        qCWarning(LOG_KBIBTEX_NETWORKING) << "Too many search queries per time; choke enforces pause of" << OnlineSearchPubMedPrivate::queryChokeTimeout << "seconds between queries";
        delayedStoppedSearch(resultNoError);
        return;
    }

    QNetworkRequest request(d->buildQueryUrl(query, numResults));
    QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request);
    InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply);
    connect(reply, &QNetworkReply::finished, this, &OnlineSearchPubMed::eSearchDone);

    refreshBusyProperty();
}


QString OnlineSearchPubMed::label() const
{
#ifdef HAVE_KF
    return i18n("PubMed");
#else // HAVE_KF
    //= onlinesearch-pubmed-label
    return QObject::tr("PubMed");
#endif // HAVE_KF
}

QUrl OnlineSearchPubMed::homepage() const
{
    return QUrl(QStringLiteral("https://pubmed.ncbi.nlm.nih.gov/"));
}

#ifdef BUILD_TESTING
QVector<QSharedPointer<Entry>> OnlineSearchPubMed::parsePubMedXML(const QByteArray &xmlData, bool *ok)
{
    return d->parsePubMedXML(xmlData, ok);
}
#endif // BUILD_TESTING

void OnlineSearchPubMed::eSearchDone()
{
    Q_EMIT progress(++curStep, numSteps);
    lastQueryEpoch = QDateTime::currentSecsSinceEpoch();

    QNetworkReply *reply = static_cast<QNetworkReply *>(sender());

    if (handleErrors(reply)) {
        const QString result = QString::fromUtf8(reply->readAll().constData());

        if (!result.contains(QStringLiteral("<Count>0</Count>"))) {
            /// without parsing XML text correctly, just extract all PubMed ids
            QStringList idList;
            int p1, p2 = 0;
            /// All IDs are within <IdList>...</IdList>
            if ((p1 = result.indexOf(QStringLiteral("<IdList>"))) > 0 && (p2 = result.indexOf(QStringLiteral("</IdList>"), p1)) > 0) {
                int p3, p4 = p1;
                /// Search for each <Id>...</Id>
                while ((p3 = result.indexOf(QStringLiteral("<Id>"), p4)) > 0 && (p4 = result.indexOf(QStringLiteral("</Id>"), p3)) > 0 && p4 < p2) {
                    /// Extract ID and add it to list
                    const QString id = result.mid(p3 + 4, p4 - p3 - 4);
                    idList << id;
                }
            }

            if (idList.isEmpty()) {
                stopSearch(resultUnspecifiedError);
            } else {
                /// fetch full bibliographic details for found PubMed ids
                QNetworkRequest request(d->buildFetchIdUrl(idList));
                QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply);
                InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply);
                connect(newReply, &QNetworkReply::finished, this, &OnlineSearchPubMed::eFetchDone);
            }
        } else {
            /// search resulted in no hits (and PubMed told so)
            stopSearch(resultNoError);
        }
    }

    refreshBusyProperty();
}

void OnlineSearchPubMed::eFetchDone()
{
    Q_EMIT progress(++curStep, numSteps);
    lastQueryEpoch = QDateTime::currentSecsSinceEpoch();

    QNetworkReply *reply = static_cast<QNetworkReply *>(sender());

    if (handleErrors(reply)) {
        const QByteArray xmlCode = reply->readAll();

        bool ok = false;
        const QVector<QSharedPointer<Entry>> entries = d->parsePubMedXML(xmlCode, &ok);

        if (ok) {
            for (const auto &entry : entries)
                publishEntry(entry);
            stopSearch(resultNoError);
        } else {
            qCWarning(LOG_KBIBTEX_NETWORKING) << "Failed to parse XML data from" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString();
            stopSearch(resultUnspecifiedError);
        }
    }

    refreshBusyProperty();
}
