Rewrite for maintainability (1.10)

This commit is contained in:
cevoj35 2023-01-06 22:29:57 +00:00
parent a0f662935b
commit 4aa0ff0339
7 changed files with 578 additions and 70 deletions

288
FmhyChecker.pyw Normal file
View file

@ -0,0 +1,288 @@
import requests
from fake_headers import Headers
import os
import re
from threading import Thread, Event
from contextlib import suppress
from pyperclip import copy
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import pyqtSignal
from PyQt5 import uic
from PyQt5.QtWidgets import QMainWindow
from requests.exceptions import ReadTimeout, ConnectionError
import time
import csv
# fake headers
headers = Headers(headers=True)
# wiki scrape
elapsed = time.perf_counter()
url_regex = re.compile(r'(?:https?|ftp|file):\/\/(?:ww(?:w|\d+)\.)?((?:[\w_-]+(?:\.[\w_-]+)+)[\w.,@?^=%&:\/~+#-]*[\w@?^=%&~+-])')
wiki = set(re.findall(url_regex, requests.get("https://raw.githubusercontent.com/nbats/FMHYedit/main/single-page").text))
print(f'Wiki scraped in {time.perf_counter() - elapsed:0.4f} sec. Found {len(wiki)} links.')
def resource_path(relative_path):
try:
base_path = sys._MEIPASS
except Exception:
base_path = os.path.dirname(__file__)
return os.path.join(base_path, relative_path)
def handle_req(url, item, callback):
try:
resp = requests.get(url, headers=headers.generate(), timeout=10)
if resp is None: resp = 'Failed'
except ReadTimeout:
callback(url, 'Timeout', item)
except ConnectionError:
callback(url, 'Error', item)
except Exception as e:
callback(url, str(e), item)
else:
callback(url, resp, item)
def async_request(*args):
thread = Thread(target=handle_req, args=args, daemon=True)
thread.start()
class UI(QMainWindow):
group_url_regex = re.compile(r'((?:https?|ftp|file):\/\/(?:ww(?:w|\d+)\.)?)((?:[\w_-]+(?:\.[\w_-]+)+)[\w.,@?^=%&:\/~+#-]*[\w@?^=%&~+-])')
call_back_checkLinks= pyqtSignal()
http_test_sig = pyqtSignal(str, object, object)
def __init__(self):
super(UI, self).__init__()
uic.loadUi(resource_path('MainWindow.ui'), self)
self.setWindowIcon(QtGui.QIcon(resource_path('assets\\icon.ico')))
self.checkSelected.setVisible(False)
self.outputTree.header().setSectionsMovable(False)
self.outputTree.setColumnWidth(0, 50)
self.outputTree.setColumnWidth(1, 180)
self.outputTree.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
# status code font
self.status_font = QtGui.QFont()
self.status_font.setFamily("Calibri")
self.status_font.setPointSize(10)
# status code colors
self.status_colors = {
range(200, 300): '#31cd64',
range(300, 400): '#33a7ff',
range(400, 500): '#fda92a',
range(500, 600): '#fc4f52',
}
# connections
self.copyDupes.clicked.connect(lambda: copy('\n'.join(self.duped_links)))
self.copyValid.clicked.connect(lambda: copy('\n'.join(self.valid_links)))
self.copyTested.clicked.connect(lambda: copy('\n'.join(self.getTestedLinks())))
self.exportCsv.clicked.connect(self.exportCsvDialog)
self.checkSelected.clicked.connect(self._testSelectedLinks)
self.inputBox.textChanged.connect(self.checkLinks)
self.outputTree.itemSelectionChanged.connect(self.onSelection)
self.http_test_sig.connect(self.finishTest)
self.call_back_checkLinks.connect(self.checkLinks)
self.testing_items = set()
self.tested_items = {}
self._is_free = Event()
self._is_free.set()
self.line_thread = None
self._new_event = False
self.retranslateUi()
self.show()
def exportCsvDialog(self):
file_dialog = QtWidgets.QFileDialog()
file_dialog.setAcceptMode(QtWidgets.QFileDialog.AcceptSave)
file_dialog.setNameFilter("CSV (*.csv)")
file_dialog.setDefaultSuffix("csv")
# show dialog
if file_dialog.exec_():
file_path = file_dialog.selectedFiles()[0]
links = re.findall(self.group_url_regex, self.inputBox.toPlainText())
try:
with open(file_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile, dialect='excel', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Request URL', 'Unique?', '# Redirects', 'Status', 'Reason'])
for link in links:
full_link = ''.join(link)
if full_link in self.tested_items:
if type(self.tested_items[full_link]) is str:
_redirects = _status_code = ''
_reason = self.tested_items[full_link]
else:
_redirects = str(len(self.tested_items[full_link].history))
_reason = self.tested_items[full_link].reason
_status_code = '=CONCAT('+', " > ", '.join(
f'HYPERLINK("{r.url}", "{r.status_code}")'
for r in (
*self.tested_items[full_link].history,
self.tested_items[full_link])
)+')'
else:
_reason = _redirects = _status_code = ''
writer.writerow([
full_link,
'FALSE' if link[1] in wiki else 'TRUE',
_redirects,
_status_code,
_reason
])
except PermissionError:
QtWidgets.QMessageBox.critical(self.centralwidget, "Error", "File permission denied.")
def getTestedLinks(self):
return [
l for l in self.tested_items
if l in self.valid_links and type(self.tested_items[l]) is not str
and self.tested_items[l].status_code in range(200, 300)
]
def finishTest(self, url, resp, item):
if url in self.testing_items:
self.testing_items.remove(url)
self.tested_items[url] = resp
self.copyTested.setEnabled(True)
try:
item.text(2)
except RuntimeError:
return # item was deleted
widget = QtWidgets.QWidget()
widget.setLayout(layout := QtWidgets.QHBoxLayout())
layout.setContentsMargins(0, 0, 0, 0)
layout.setSpacing(2)
layout.setAlignment(QtCore.Qt.AlignLeft)
self.outputTree.setItemWidget(item, 2, widget)
item.setText(2, "")
if type(resp) is str:
label = QtWidgets.QLabel(f' {resp} ')
label.setFont(self.status_font)
label.setStyleSheet('background-color: #A12729; color: white; border-radius: 6px;')
layout.addWidget(label)
return
for r in (*resp.history, resp):
label = QtWidgets.QLabel(f" {r.status_code} ")
color = next((self.status_colors[k] for k in self.status_colors if r.status_code in k), '#000000')
label.setStyleSheet(f'background-color: {color}; color: white; border-radius: 6px;')
label.setToolTip(r.url)
label.setToolTipDuration(-1)
label.setFont(self.status_font)
layout.addWidget(label)
def _testSelectedLinks(self):
selected = self.getRanItems()
self.testing_items.update([i.text(1) for i in selected]) # remember tested items
for item in selected:
item.setText(2, "Testing...")
self.outputTree.clearSelection()
self.checkSelected.setVisible(False)
for item in selected:
async_request(item.text(1), item, self.http_test_sig.emit)
def getRanItems(self):
return [i for i in self.outputTree.selectedItems()
if i.text(1) not in {*self.tested_items, *self.testing_items}]
def onSelection(self):
if selected := self.getRanItems():
self.checkSelected.setText(QtCore.QCoreApplication.translate("MainWindow", f"Test ({len(selected)}) \U0001F50D"))
self.checkSelected.setVisible(True)
else:
self.checkSelected.setVisible(False)
def _waitForEvent(self):
self._new_event = True
self._is_free.wait()
self.call_back_checkLinks.emit()
def checkLinks(self):
if not self._is_free.is_set():
if self.line_thread and self.line_thread.is_alive():
return
self.line_thread = Thread(target=self._waitForEvent, daemon=True)
self.line_thread.start()
return
self._is_free.clear()
text = self.inputBox.toPlainText()
if text:
self.inputBox.setPlaceholderText('')
else:
self.inputBox.setPlaceholderText(self._placeholderText)
self.outputTree.clear()
self.copyValid.setEnabled(False)
self.copyDupes.setEnabled(False)
self.copyTested.setEnabled(False)
self.exportCsv.setEnabled(False)
self.checkSelected.setVisible(False)
links = re.findall(self.group_url_regex, text)
self.valid_links, self.duped_links, self.tested_links = [], [], []
# populate tree
for n, link in enumerate(links):
if self._new_event:
self._new_event = False
self._is_free.set()
return
item = QtWidgets.QTreeWidgetItem(self.outputTree)
full_link = ''.join(link)
item.setText(1, full_link)
if len(links) > 100 and not n % 10:
# process in chunks to allow for UI updates
QtWidgets.QApplication.processEvents()
with suppress(RuntimeError):
if full_link in self.tested_items:
self.finishTest(full_link, self.tested_items[full_link], item)
elif full_link in self.testing_items:
item.setText(2, "Testing...")
if link[1] in wiki:
item.setText(0, "\u274C")
for _ in range(3):
item.setBackground(_, QtGui.QColor(255, 128, 0))
self.duped_links.append(full_link)
else:
item.setText(0, "\u2705")
self.valid_links.append(full_link)
# toggle buttons
self.copyValid.setEnabled(bool(self.valid_links))
self.copyDupes.setEnabled(bool(self.duped_links))
self.copyTested.setEnabled(bool(self.getTestedLinks()))
self.exportCsv.setEnabled(True)
# handle copy buttons
self._is_free.set()
def retranslateUi(self):
# Set text (with translations)
_translate = QtCore.QCoreApplication.translate
self.setWindowTitle(_translate("MainWindow", "Dupe Checker v1.10"))
self.label.setText(_translate("MainWindow", "FMHY Dupe Tester"))
self.label_2.setText(_translate("MainWindow", "by cevoj35548"))
self._placeholderText = _translate("MainWindow", "Paste a list of links here")
self.inputBox.setPlaceholderText(self._placeholderText)
self.copyValid.setText(_translate("MainWindow", "Copy \u2705"))
self.copyDupes.setText(_translate("MainWindow", "Copy \u274C"))
self.copyTested.setText(_translate("MainWindow", "Copy \U0001F50D"))
self.checkSelected.setText(_translate("MainWindow", "Test \U0001F50D"))
self.outputTree.headerItem().setText(0, _translate("MainWindow", "Check"))
self.outputTree.headerItem().setText(1, _translate("MainWindow", "Link"))
self.outputTree.headerItem().setText(2, _translate("MainWindow", "Status"))
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
fonts_dir = resource_path('assets\\fonts')
for f in os.listdir(fonts_dir):
QtGui.QFontDatabase.addApplicationFont(os.path.join(fonts_dir, f))
MainWindow = QMainWindow()
window = UI()
sys.exit(app.exec_())

254
MainWindow.ui Normal file
View file

@ -0,0 +1,254 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>750</width>
<height>450</height>
</rect>
</property>
<widget class="QWidget" name="centralwidget">
<layout class="QGridLayout" name="gridLayout">
<property name="leftMargin">
<number>9</number>
</property>
<property name="topMargin">
<number>3</number>
</property>
<property name="rightMargin">
<number>9</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item row="0" column="0" colspan="2">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QLabel" name="label">
<property name="font">
<font>
<family>Segoe UI Variable Display</family>
<pointsize>14</pointsize>
</font>
</property>
<property name="text">
<string>FMHY Dupe Tester</string>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QLabel" name="label_2">
<property name="font">
<font>
<family>Segoe UI Variable Display</family>
<pointsize>14</pointsize>
</font>
</property>
<property name="text">
<string>by cevoj35548</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="1" column="0">
<widget class="QPlainTextEdit" name="inputBox">
<property name="font">
<font>
<family>Calibri</family>
<pointsize>11</pointsize>
</font>
</property>
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="placeholderText">
<string>Paste a list of links here</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QFrame" name="gridFrame">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<property name="leftMargin">
<number>1</number>
</property>
<property name="topMargin">
<number>1</number>
</property>
<property name="rightMargin">
<number>1</number>
</property>
<property name="bottomMargin">
<number>4</number>
</property>
<property name="verticalSpacing">
<number>4</number>
</property>
<item row="1" column="0">
<layout class="QHBoxLayout" name="horizontalLayout_2">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item>
<widget class="QPushButton" name="copyDupes">
<property name="enabled">
<bool>false</bool>
</property>
<property name="font">
<font>
<family>Calibri</family>
<pointsize>11</pointsize>
</font>
</property>
<property name="text">
<string>Copy ❌</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="copyValid">
<property name="enabled">
<bool>false</bool>
</property>
<property name="font">
<font>
<family>Calibri</family>
<pointsize>11</pointsize>
</font>
</property>
<property name="text">
<string>Copy ✅</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="copyTested">
<property name="enabled">
<bool>false</bool>
</property>
<property name="font">
<font>
<family>Calibri</family>
<pointsize>11</pointsize>
</font>
</property>
<property name="text">
<string>Copy 🔍</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="checkSelected">
<property name="font">
<font>
<family>Calibri</family>
<pointsize>11</pointsize>
</font>
</property>
<property name="text">
<string>Test 🔍</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="exportCsv">
<property name="sizePolicy">
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>30</width>
<height>30</height>
</size>
</property>
<property name="maximumSize">
<size>
<width>30</width>
<height>30</height>
</size>
</property>
<property name="font">
<font>
<family>Segoe MDL2 Assets</family>
<pointsize>13</pointsize>
</font>
</property>
<property name="text">
<string></string>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="0">
<widget class="QTreeWidget" name="outputTree">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="rootIsDecorated">
<bool>false</bool>
</property>
<column>
<property name="text">
<string>Check</string>
</property>
</column>
<column>
<property name="text">
<string>Link</string>
</property>
</column>
<column>
<property name="text">
<string>Status</string>
</property>
</column>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
</widget>
<resources/>
<connections/>
</ui>

106
README.md
View file

@ -1,92 +1,58 @@
# Fmhy Dupe Checker
# FMHY Dupe Checker
A simple, *fast* tool to compare links against the FMHY wiki, and display their redirect chains.
![screenshot](https://i.imgur.com/B0yZPq4.png)
<hr width=100>
## Getting started
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
### Clone the repo
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
## Add your files
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
```
cd existing_repo
git remote add origin https://gitlab.com/cevoj/fmhy-dupe-checker.git
git branch -M main
git push -uf origin main
```bash
git clone https://gitlab.com/cevoj/fmhy-dupe-checker.git
cd fmhy-dupe-checker\
```
## Integrate with your tools
### Install requirements
- [ ] [Set up project integrations](https://gitlab.com/cevoj/fmhy-dupe-checker/-/settings/integrations)
[Python](https://www.python.org/downloads/) is required.
## Collaborate with your team
```bash
pip install requests PyQt5 pyperclip fake-headers
```
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
Run `FmhyChecker.pyw`
## Test and Deploy
Use the built-in continuous integration in GitLab.
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
***
# Editing this README
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
## Suggestions for a good README
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
## Name
Choose a self-explaining name for your project.
## Description
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
## Badges
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
## Visuals
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
## Installation
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
<hr width=100>
## Usage
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
## Support
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
### Comparing and copying links
## Roadmap
If you have ideas for releases in the future, it is a good idea to list them in the README.
This tool takes links inputted into the field on the left, and checks if they are not already present in the wiki. Links will be automatically pulled using regex.
## Contributing
State if you are open to contributions and what your requirements are for accepting them.
*Note that the ReGex is designed to ignore trailing `/`, `http`/`https`, and `www`/`ww<n>`*
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
*Dupes* will be indicated with a ❌, and *unique* links will have a ✅. Once the scan is complete, the `Copy ❌` and `Copy ✅` buttons will be ungreyed, allowing you to copy all *dupe* or *unique* flagged links separated by a newline (`\n`).
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
## Authors and acknowledgment
Show your appreciation to those who have contributed to the project.
### Checking URL validity
## License
For open source projects, say how it is licensed.
![video here](https://i.imgur.com/9BhHsaY.mp4)
## Project status
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
Selecting links and clicking `Test` will the URL's redirect chain. View a URL by hovering over its status code.
The `Copy 🔍` button will copy all links that are *unique* (✅) **and** *tested* to have successful responses (read more about status codes [here](https://httpstatus.io/http-status-codes)).
### CSV Exports
The download button will export the data to a CSV file.
![csv](https://i.imgur.com/KzxzNIb.png)
*Note: Status codes are hyperlinked.*
---

BIN
assets/fonts/Calibri.ttf Normal file

Binary file not shown.

BIN
assets/fonts/SegUIVar.ttf Normal file

Binary file not shown.

Binary file not shown.

BIN
assets/icon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB