2023-10-14 13:18:27 +00:00
|
|
|
/*
|
2025-02-10 01:03:15 +00:00
|
|
|
* Copyright (C) 2025 KeePassXC Team <team@keepassxc.org>
|
2023-10-14 13:18:27 +00:00
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 2 or (at your option)
|
|
|
|
|
* version 3 of the License.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "UrlTools.h"
|
2024-06-24 11:43:59 +00:00
|
|
|
#if defined(KPXC_FEATURE_NETWORK) || defined(KPXC_FEATURE_BROWSER)
|
2023-10-14 13:18:27 +00:00
|
|
|
#include <QHostAddress>
|
|
|
|
|
#include <QNetworkCookie>
|
|
|
|
|
#include <QNetworkCookieJar>
|
|
|
|
|
#endif
|
|
|
|
|
#include <QRegularExpression>
|
|
|
|
|
#include <QUrl>
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
const QString UrlTools::URL_WILDCARD = "1kpxcwc1";
|
|
|
|
|
|
2023-10-14 13:18:27 +00:00
|
|
|
Q_GLOBAL_STATIC(UrlTools, s_urlTools)
|
|
|
|
|
|
|
|
|
|
UrlTools* UrlTools::instance()
|
|
|
|
|
{
|
|
|
|
|
return s_urlTools;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QUrl UrlTools::convertVariantToUrl(const QVariant& var) const
|
|
|
|
|
{
|
|
|
|
|
QUrl url;
|
|
|
|
|
if (var.canConvert<QUrl>()) {
|
|
|
|
|
url = var.toUrl();
|
|
|
|
|
}
|
|
|
|
|
return url;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-24 11:43:59 +00:00
|
|
|
#if defined(KPXC_FEATURE_NETWORK) || defined(KPXC_FEATURE_BROWSER)
|
2023-10-14 13:18:27 +00:00
|
|
|
QUrl UrlTools::getRedirectTarget(QNetworkReply* reply) const
|
|
|
|
|
{
|
|
|
|
|
QVariant var = reply->attribute(QNetworkRequest::RedirectionTargetAttribute);
|
|
|
|
|
QUrl url = convertVariantToUrl(var);
|
|
|
|
|
return url;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gets the base domain of URL or hostname.
|
|
|
|
|
*
|
|
|
|
|
* Returns the base domain, e.g. https://another.example.co.uk -> example.co.uk
|
|
|
|
|
* Up-to-date list can be found: https://publicsuffix.org/list/public_suffix_list.dat
|
|
|
|
|
*/
|
|
|
|
|
QString UrlTools::getBaseDomainFromUrl(const QString& url) const
|
|
|
|
|
{
|
|
|
|
|
auto qUrl = QUrl::fromUserInput(url);
|
|
|
|
|
|
|
|
|
|
auto host = qUrl.host();
|
|
|
|
|
if (isIpAddress(host)) {
|
|
|
|
|
return host;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const auto tld = getTopLevelDomainFromUrl(qUrl.toString());
|
|
|
|
|
if (tld.isEmpty() || tld.length() + 1 >= host.length()) {
|
|
|
|
|
return host;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remove the top level domain part from the hostname, e.g. https://another.example.co.uk -> https://another.example
|
|
|
|
|
host.chop(tld.length() + 1);
|
|
|
|
|
// Split the URL and select the last part, e.g. https://another.example -> example
|
|
|
|
|
QString baseDomain = host.split('.').last();
|
|
|
|
|
// Append the top level domain back to the URL, e.g. example -> example.co.uk
|
|
|
|
|
baseDomain.append(QString(".%1").arg(tld));
|
|
|
|
|
|
|
|
|
|
return baseDomain;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gets the top level domain from URL.
|
|
|
|
|
*
|
|
|
|
|
* Returns the TLD e.g. https://another.example.co.uk -> co.uk
|
|
|
|
|
*/
|
|
|
|
|
QString UrlTools::getTopLevelDomainFromUrl(const QString& url) const
|
|
|
|
|
{
|
|
|
|
|
auto host = QUrl::fromUserInput(url).host();
|
|
|
|
|
if (isIpAddress(host)) {
|
|
|
|
|
return host;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const auto numberOfDomainParts = host.split('.').length();
|
|
|
|
|
static const auto dummy = QByteArrayLiteral("");
|
|
|
|
|
|
|
|
|
|
// Only loop the amount of different parts found
|
|
|
|
|
for (auto i = 0; i < numberOfDomainParts; ++i) {
|
|
|
|
|
// Cut the first part from host
|
|
|
|
|
host = host.mid(host.indexOf('.') + 1);
|
|
|
|
|
|
|
|
|
|
QNetworkCookie cookie(dummy, dummy);
|
|
|
|
|
cookie.setDomain(host);
|
|
|
|
|
|
|
|
|
|
// Check if dummy cookie's domain/TLD matches with public suffix list
|
2024-03-06 12:42:01 +00:00
|
|
|
if (!QNetworkCookieJar{}.setCookiesFromUrl(QList{cookie}, QUrl::fromUserInput(url))) {
|
2023-10-14 13:18:27 +00:00
|
|
|
return host;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return host;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool UrlTools::isIpAddress(const QString& host) const
|
|
|
|
|
{
|
2024-03-06 12:42:01 +00:00
|
|
|
// Handle IPv6 host with brackets, e.g [::1]
|
|
|
|
|
const auto hostAddress = host.startsWith('[') && host.endsWith(']') ? host.mid(1, host.length() - 2) : host;
|
|
|
|
|
QHostAddress address(hostAddress);
|
2023-10-14 13:18:27 +00:00
|
|
|
return address.protocol() == QAbstractSocket::IPv4Protocol || address.protocol() == QAbstractSocket::IPv6Protocol;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Returns true if URLs are identical. Paths with "/" are removed during comparison.
|
|
|
|
|
// URLs without scheme reverts to https.
|
|
|
|
|
// Special handling is needed because QUrl::matches() with QUrl::StripTrailingSlash does not strip "/" paths.
|
|
|
|
|
bool UrlTools::isUrlIdentical(const QString& first, const QString& second) const
|
|
|
|
|
{
|
|
|
|
|
auto trimUrl = [](QString url) {
|
|
|
|
|
url = url.trimmed();
|
|
|
|
|
if (url.endsWith("/")) {
|
|
|
|
|
url.remove(url.length() - 1, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return url;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (first.isEmpty() || second.isEmpty()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
// Replace URL wildcards for comparison if found
|
|
|
|
|
const auto firstUrl = trimUrl(QString(first).replace("*", UrlTools::URL_WILDCARD));
|
|
|
|
|
const auto secondUrl = trimUrl(QString(second).replace("*", UrlTools::URL_WILDCARD));
|
2023-10-14 13:18:27 +00:00
|
|
|
if (firstUrl == secondUrl) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return QUrl(firstUrl).matches(QUrl(secondUrl), QUrl::StripTrailingSlash);
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
bool UrlTools::isUrlValid(const QString& urlField, bool looseComparison) const
|
2023-10-14 13:18:27 +00:00
|
|
|
{
|
|
|
|
|
if (urlField.isEmpty() || urlField.startsWith("cmd://", Qt::CaseInsensitive)
|
|
|
|
|
|| urlField.startsWith("kdbx://", Qt::CaseInsensitive) || urlField.startsWith("{REF:A", Qt::CaseInsensitive)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
auto url = urlField;
|
|
|
|
|
|
|
|
|
|
// Loose comparison that allows wildcards and exact URL inside " characters
|
|
|
|
|
if (looseComparison) {
|
|
|
|
|
// Exact URL
|
|
|
|
|
if (url.startsWith("\"") && url.endsWith("\"")) {
|
|
|
|
|
// Do not allow exact URL with wildcards, or empty exact URL
|
|
|
|
|
if (url.contains("*") || url.length() == 2) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get the URL inside ""
|
|
|
|
|
url.remove(0, 1);
|
|
|
|
|
url.remove(url.length() - 1, 1);
|
|
|
|
|
} else {
|
|
|
|
|
// Do not allow URL with just wildcards, or double wildcards, or no separator (.)
|
|
|
|
|
if (url.length() == url.count("*") || url.contains("**") || url.contains("*.*") || !url.contains(".")) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
url.replace("*", UrlTools::URL_WILDCARD);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QUrl qUrl;
|
2023-10-14 13:18:27 +00:00
|
|
|
if (urlField.contains("://")) {
|
2025-02-10 01:03:15 +00:00
|
|
|
qUrl = url;
|
2023-10-14 13:18:27 +00:00
|
|
|
} else {
|
2025-02-10 01:03:15 +00:00
|
|
|
qUrl = QUrl::fromUserInput(url);
|
2023-10-14 13:18:27 +00:00
|
|
|
}
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
if (qUrl.scheme() != "file" && qUrl.host().isEmpty()) {
|
2023-10-14 13:18:27 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-10 01:03:15 +00:00
|
|
|
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
|
|
|
|
|
// Prevent TLD wildcards
|
|
|
|
|
if (looseComparison && url.contains(UrlTools::URL_WILDCARD)) {
|
|
|
|
|
const auto tld = getTopLevelDomainFromUrl(url);
|
2025-02-12 16:12:28 +00:00
|
|
|
if (tld.contains(UrlTools::URL_WILDCARD) || qUrl.host() == QString("%1.%2").arg(UrlTools::URL_WILDCARD, tld)) {
|
2025-02-10 01:03:15 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2023-10-14 13:18:27 +00:00
|
|
|
// Check for illegal characters. Adds also the wildcard * to the list
|
|
|
|
|
QRegularExpression re("[<>\\^`{|}\\*]");
|
2025-02-10 01:03:15 +00:00
|
|
|
auto match = re.match(url);
|
2023-10-14 13:18:27 +00:00
|
|
|
if (match.hasMatch()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2024-03-06 12:42:01 +00:00
|
|
|
|
|
|
|
|
bool UrlTools::domainHasIllegalCharacters(const QString& domain) const
|
|
|
|
|
{
|
|
|
|
|
QRegularExpression re(R"([\s\^#|/:<>\?@\[\]\\])");
|
|
|
|
|
return re.match(domain).hasMatch();
|
|
|
|
|
}
|