Fix Path handling (#72)

* Fix Path handling
* Port to pathlib
* Replace with re.sub
* Tidied regular expression
* Ensure escaping is done on single path level at a time
* Version bump to 2.3.0
* pylint & black fixes
This commit is contained in:
Peter Taylor 2023-03-16 08:27:43 +00:00 committed by GitHub
parent bf98823ede
commit b64e8197b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 48 deletions

View file

@ -1,3 +1,5 @@
# pylint: disable=consider-using-from-import
import itchiodl.utils as utils
from .login import LoginWeb, LoginAPI
from .bundle import Bundle
from .library import Library

View file

@ -1,13 +1,11 @@
import re
import json
import os
import urllib
import datetime
import shutil
from pathlib import Path
import requests
import itchiodl.utils
from itchiodl import utils
class Game:
@ -31,6 +29,11 @@ class Game:
self.files = []
self.downloads = []
self.dir = (
Path(".")
/ utils.clean_path(self.publisher_slug)
/ utils.clean_path(self.game_slug)
)
def load_downloads(self, token):
"""Load all downloads for this game"""
@ -53,17 +56,13 @@ class Game:
"""Download a singular file"""
print("Downloading", self.name)
# if os.path.exists(f"{self.publisher_slug}/{self.game_slug}.json"):
# if out_folder.with_suffix(".json").exists():
# print(f"Skipping Game {self.name}")
# return
self.load_downloads(token)
if not os.path.exists(self.publisher_slug):
os.mkdir(self.publisher_slug)
if not os.path.exists(f"{self.publisher_slug}/{self.game_slug}"):
os.mkdir(f"{self.publisher_slug}/{self.game_slug}")
self.dir.mkdir(parents=True, exist_ok=True)
for d in self.downloads:
if (
@ -75,7 +74,7 @@ class Game:
continue
self.do_download(d, token)
with open(f"{self.publisher_slug}/{self.game_slug}.json", "w") as f:
with self.dir.with_suffix(".json").open("w") as f:
json.dump(
{
"name": self.name,
@ -93,42 +92,41 @@ class Game:
"""Download a single file, checking for existing files"""
print(f"Downloading {d['filename']}")
file = itchiodl.utils.clean_path(d["filename"] or d["display_name"] or d["id"])
path = itchiodl.utils.clean_path(f"{self.publisher_slug}/{self.game_slug}")
filename = d["filename"] or d["display_name"] or d["id"]
if os.path.exists(f"{path}/{file}"):
print(f"File Already Exists! {file}")
if os.path.exists(f"{path}/{file}.md5"):
out_file = self.dir / filename
with open(f"{path}/{file}.md5", "r") as f:
if out_file.exists():
print(f"File Already Exists! {filename}")
md5_file = out_file.with_suffix(".md5")
if md5_file.exists():
with md5_file.open("r") as f:
md5 = f.read().strip()
if md5 == d["md5_hash"]:
print(f"Skipping {self.name} - {file}")
print(f"Skipping {self.name} - {filename}")
return
print(f"MD5 Mismatch! {file}")
print(f"MD5 Mismatch! {filename}")
else:
md5 = itchiodl.utils.md5sum(f"{path}/{file}")
md5 = utils.md5sum(str(out_file))
if md5 == d["md5_hash"]:
print(f"Skipping {self.name} - {file}")
print(f"Skipping {self.name} - {filename}")
# Create checksum file
with open(f"{path}/{file}.md5", "w") as f:
with md5_file.open("w") as f:
f.write(d["md5_hash"])
return
# Old Download or corrupted file?
corrupted = False
if corrupted:
os.remove(f"{path}/{file}")
out_file.remove()
return
if not os.path.exists(f"{path}/old"):
os.mkdir(f"{path}/old")
old_dir = self.dir / "old"
old_dir.mkdir(exist_ok=True)
print(f"Moving {file} to old/")
print(f"Moving {filename} to old/")
timestamp = datetime.datetime.now().strftime("%Y-%m-%d")
print(timestamp)
shutil.move(f"{path}/{file}", f"{path}/old/{timestamp}-{file}")
out_file.rename(old_dir / f"{timestamp}-{filename}")
# Get UUID
r = requests.post(
@ -150,16 +148,16 @@ class Game:
)
# response_code = urllib.request.urlopen(url).getcode()
try:
itchiodl.utils.download(url, path, self.name, file)
except itchiodl.utils.NoDownloadError:
utils.download(url, self.dir, self.name, filename)
except utils.NoDownloadError:
print("Http response is not a download, skipping")
with open("errors.txt", "a") as f:
f.write(
f""" Cannot download game/asset: {self.game_slug}
Publisher Name: {self.publisher_slug}
Path: {path}
File: {file}
Path: {out_file}
File: {filename}
Request URL: {url}
This request failed due to a missing response header
This game/asset has been skipped please download manually
@ -174,8 +172,8 @@ class Game:
f.write(
f""" Cannot download game/asset: {self.game_slug}
Publisher Name: {self.publisher_slug}
Path: {path}
File: {file}
Path: {out_file}
File: {filename}
Request URL: {url}
Request Response Code: {e.code}
Error Reason: {e.reason}
@ -186,10 +184,10 @@ class Game:
return
# Verify
if itchiodl.utils.md5sum(f"{path}/{file}") != d["md5_hash"]:
print(f"Failed to verify {file}")
if utils.md5sum(out_file) != d["md5_hash"]:
print(f"Failed to verify {filename}")
return
# Create checksum file
with open(f"{path}/{file}.md5", "w") as f:
with out_file.with_suffix(".md5").open("w") as f:
f.write(d["md5_hash"])

View file

@ -6,6 +6,7 @@ import requests
from bs4 import BeautifulSoup
from itchiodl.game import Game
from itchiodl.utils import NoDownloadError
class Library:
@ -89,15 +90,21 @@ class Library:
def download_library(self, platform=None):
"""Download all games in the library"""
with ThreadPoolExecutor(max_workers=self.jobs) as executor:
i = [0]
i = [0, 0]
l = len(self.games)
lock = threading.RLock()
def dl(i, g):
x = g.download(self.login, platform)
with lock:
i[0] += 1
print(f"Downloaded {g.name} ({i[0]} of {l})")
return x
try:
g.download(self.login, platform)
with lock:
i[0] += 1
print(f"Downloaded {g.name} ({i[0]} of {l})")
except NoDownloadError as e:
print(e)
i[1] += 1
executor.map(functools.partial(dl, i), self.games)
r = executor.map(functools.partial(dl, i), self.games)
for _ in r:
pass
print(f"Downloaded {i[0]} Games, {i[1]} Errors")

View file

@ -40,7 +40,7 @@ def download(url, path, name, file):
def clean_path(path):
"""Cleans a path on windows"""
if sys.platform in ["win32", "cygwin", "msys"]:
path_clean = re.replace(r"[\<\>\:\"\/\\\|\?\*]", "-", path)
path_clean = re.sub(r"[<>:|?*\"\/\\]", "-", path)
return path_clean
return path
@ -48,7 +48,7 @@ def clean_path(path):
def md5sum(path):
"""Returns the md5sum of a file"""
md5 = hashlib.md5()
with open(path, "rb") as f:
with path.open("rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
return md5.hexdigest()

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "itchiodl"
version = "2.2.0"
version = "2.3.0"
description = "Python Scripts for downloading / archiving your itchio library"
authors = ["Peter Taylor <me@et1.uk>"]
license = "MIT"