From 1d5dcb90c0b034d492567a0aa28f08a0bab0b971 Mon Sep 17 00:00:00 2001 From: Ajurna Date: Fri, 9 Apr 2021 11:29:03 +0100 Subject: [PATCH] added a lock on a critical path when regenerating pages. this should allow all images to load correctly --- .gitignore | 3 +- cbreader/settings/base.py | 1 - comic/errors.py | 2 + comic/models.py | 159 +++++++++++++------------- comic/templates/comic/read_comic.html | 10 +- comic/tests/test_models.py | 13 ++- comic/urls.py | 34 +++--- comic/views.py | 7 +- poetry.lock | 17 +-- pyproject.toml | 1 - 10 files changed, 125 insertions(+), 122 deletions(-) create mode 100644 comic/errors.py diff --git a/.gitignore b/.gitignore index 8d9456d..c5979eb 100644 --- a/.gitignore +++ b/.gitignore @@ -88,4 +88,5 @@ __pycache__/ local_settings.py .env -db.sqlite3 \ No newline at end of file +db.sqlite3 +identifier.sqlite \ No newline at end of file diff --git a/cbreader/settings/base.py b/cbreader/settings/base.py index 20cb683..245e3c1 100644 --- a/cbreader/settings/base.py +++ b/cbreader/settings/base.py @@ -38,7 +38,6 @@ INSTALLED_APPS = ( 'bootstrap4', "comic", "comic_auth", - 'db_mutex', ) MIDDLEWARE = [ diff --git a/comic/errors.py b/comic/errors.py new file mode 100644 index 0000000..580f161 --- /dev/null +++ b/comic/errors.py @@ -0,0 +1,2 @@ +class NotCompatibleArchive(Exception): + pass \ No newline at end of file diff --git a/comic/models.py b/comic/models.py index 8703278..3dcff90 100644 --- a/comic/models.py +++ b/comic/models.py @@ -1,10 +1,12 @@ +import mimetypes import uuid import zipfile from dataclasses import dataclass from functools import reduce +from itertools import zip_longest from os import listdir from pathlib import Path -from typing import Optional, List +from typing import Optional, List, Union, Tuple from django.conf import settings from django.contrib.auth.models import User @@ -13,11 +15,11 @@ from django.db.transaction import atomic from django.utils.http import urlsafe_base64_encode import PyPDF4 import PyPDF4.utils -from db_mutex import DBMutexError, DBMutexTimeoutError -from db_mutex.db_mutex import db_mutex import rarfile +from comic.errors import NotCompatibleArchive + if settings.UNRAR_TOOL: rarfile.UNRAR_TOOL = settings.UNRAR_TOOL @@ -72,15 +74,6 @@ class Directory(models.Model): self.parent.get_path_objects(p) return p - # @staticmethod - # def get_dir_from_path(file_path): - # file_path = file_path.split(os_path.sep) - # print(file_path) - # for d in Directory.objects.filter(name=file_path[-1]): - # print(d) - # if d.get_path_items() == file_path: - # return d - class ComicBook(models.Model): file_name = models.TextField() @@ -112,25 +105,9 @@ class ComicBook(models.Model): archive = zipfile.ZipFile(archive_path) except zipfile.BadZipfile: return False - try: - page_obj = ComicPage.objects.get(Comic=self, index=page) - except ComicPage.MultipleObjectsReturned: - with db_mutex('comicpage'): - ComicPage.objects.filter(Comic=self).delete() - self.process_comic_pages(archive, self) - page_obj = ComicPage.objects.get(Comic=self, index=page) - except ComicPage.DoesNotExist: - with db_mutex('comicpage'): - ComicPage.objects.filter(Comic=self).delete() - self.process_comic_pages(archive, self) - page_obj = ComicPage.objects.get(Comic=self, index=page) - try: - out = (archive.open(page_obj.page_file_name), page_obj.content_type) - except rarfile.NoRarEntry: - with db_mutex('comicpage'): - ComicPage.objects.filter(Comic=self).delete() - self.process_comic_pages(archive, self) - out = self.get_image(page) + + page_obj = ComicPage.objects.get(Comic=self, index=page) + out = (archive.open(page_obj.page_file_name), page_obj.content_type) return out def is_last_page(self, page): @@ -251,7 +228,7 @@ class ComicBook(models.Model): return ComicPage.objects.get(Comic=self, index=index).page_file_name @staticmethod - def process_comic_book(comic_file_name, directory=False): + def process_comic_book(comic_file_name: Path, directory: "Directory" = False) -> Union["ComicBook", Path]: """ :type comic_file_name: str @@ -265,42 +242,19 @@ class ComicBook(models.Model): return book except ComicBook.DoesNotExist: pass - base_dir = settings.COMIC_BOOK_VOLUME - if directory: - comic_full_path = Path(base_dir, directory.get_path(), comic_file_name) - else: - comic_full_path = Path(base_dir, comic_file_name) + book = ComicBook(file_name=comic_file_name, directory=directory if directory else None) + book.save() try: - cbx = rarfile.RarFile(comic_full_path) - except rarfile.NotRarFile: - cbx = None - if not cbx: - try: - cbx = zipfile.ZipFile(comic_full_path) - except zipfile.BadZipFile: - cbx = None - pdf_file = None - if not cbx: - try: - pdf_file = PyPDF4.PdfFileReader(str(comic_full_path)) - except PyPDF4.utils.PyPdfError: - pass - if not pdf_file and not cbx: + archive, archive_type = book.get_archive() + except NotCompatibleArchive: return comic_file_name - - if directory: - book = ComicBook(file_name=comic_file_name, directory=directory) - else: - book = ComicBook(file_name=comic_file_name) - book.save() - page_index = 0 - if cbx: - ComicBook.process_comic_pages(cbx, book) - elif pdf_file: + if archive_type == 'archive': + ComicBook.process_comic_pages(archive, book) + elif archive_type == 'pdf': with atomic(): - for page_index in range(pdf_file.getNumPages()): + for page_index in range(archive.getNumPages()): page = ComicPage( Comic=book, index=page_index, page_file_name=page_index+1, content_type='application/pdf' ) @@ -312,23 +266,11 @@ class ComicBook(models.Model): with atomic(): page_index = 0 for page_file_name in sorted([str(x) for x in cbx.namelist()], key=str.lower): - try: - dot_index = page_file_name.rindex(".") + 1 - except ValueError: - continue - ext = page_file_name.lower()[dot_index:] - if ext in ["jpg", "jpeg"]: - content_type = "image/jpeg" - elif ext == "png": - content_type = "image/png" - elif ext == "bmp": - content_type = "image/bmp" - elif ext == "gif": - content_type = "image/gif" - else: - content_type = "text/plain" page = ComicPage( - Comic=book, index=page_index, page_file_name=page_file_name, content_type=content_type + Comic=book, + index=page_index, + page_file_name=page_file_name, + content_type=mimetypes.guess_type(page_file_name)[0] ) page.save() page_index += 1 @@ -344,6 +286,65 @@ class ComicBook(models.Model): files.append(item) return sorted(directories) + sorted(files) + @property + def get_archive_path(self): + if self.directory: + return Path(settings.COMIC_BOOK_VOLUME, self.directory.get_path(), self.file_name) + else: + return Path(settings.COMIC_BOOK_VOLUME, self.file_name) + + def get_archive(self) -> Tuple[Union[rarfile.RarFile, zipfile.ZipFile, PyPDF4.PdfFileReader], str]: + archive_path = self.get_archive_path + try: + return rarfile.RarFile(archive_path), 'archive' + except rarfile.NotRarFile: + pass + try: + return zipfile.ZipFile(archive_path), 'archive' + except zipfile.BadZipFile: + pass + + try: + return PyPDF4.PdfFileReader(str(archive_path)), 'pdf' + except PyPDF4.utils.PyPdfError: + pass + raise NotCompatibleArchive + + def verify_pages(self, pages: Optional["ComicPage"] = None): + if not pages: + pages = ComicPage.objects.filter(Comic=self) + + archive, archive_type = self.get_archive() + if archive_type == 'pdf': + return + archive_files = [(x, mimetypes.guess_type(x)[0]) for x in sorted(archive.namelist()) if not x.endswith('/')] + index = 0 + print(archive_files) + for a_file, db_file in zip_longest(archive_files, pages): + print(a_file, db_file.page_file_name, db_file.content_type) + if not a_file: + db_file.delete() + continue + if not db_file: + ComicPage( + Comic=self, + page_file_name=a_file[0], + index=index, + content_type=a_file[1] + ).save() + index += 1 + continue + changed = False + if a_file[0] != db_file.page_file_name: + db_file.page_file_name = a_file[0] + changed = True + if a_file[1] != db_file.content_type: + db_file.content_type = a_file[1] + changed = True + if changed: + db_file.save() + index += 1 + class ComicPage(models.Model): Comic = models.ForeignKey(ComicBook, on_delete=models.CASCADE) diff --git a/comic/templates/comic/read_comic.html b/comic/templates/comic/read_comic.html index 19ea9c3..615d5b6 100644 --- a/comic/templates/comic/read_comic.html +++ b/comic/templates/comic/read_comic.html @@ -5,9 +5,15 @@
-
+
{% for page in pages %} -
+
+ {% if page.content_type|first in 'image' %} + {{ page.page_file_name }} + {% else %} +

+ {% endif %} +
{% endfor %}
diff --git a/comic/tests/test_models.py b/comic/tests/test_models.py index 0552a9f..75a57eb 100644 --- a/comic/tests/test_models.py +++ b/comic/tests/test_models.py @@ -4,6 +4,7 @@ import os from django.contrib.auth.models import User from django.test import Client, TestCase +from django.urls import reverse from django.utils.http import urlsafe_base64_encode from django.conf import settings from pathlib import Path @@ -17,11 +18,11 @@ class ComicBookTests(TestCase): settings.COMIC_BOOK_VOLUME = Path(Path.cwd(), 'test_comics') User.objects.create_user("test", "test@test.com", "test") user = User.objects.first() - ComicBook.process_comic_book("test1.rar") - book = ComicBook.process_comic_book("test2.rar") + ComicBook.process_comic_book(Path("test1.rar")) + book = ComicBook.process_comic_book(Path("test2.rar")) status = ComicStatus(user=user, comic=book, last_read_page=2, unread=False) status.save() - ComicBook.process_comic_book("test4.rar") + ComicBook.process_comic_book(Path("test4.rar")) def test_comic_processing(self): book = ComicBook.objects.get(file_name="test1.rar") @@ -290,6 +291,10 @@ class ComicBookTests(TestCase): page.save() generate_directory(user) c.login(username="test", password="test") + print(reverse('get_image', args=[book.selector_string, 0])) + response = c.get(reverse('read_comic', args=[book.selector_string])) + self.assertEqual(response.status_code, 200) + response = c.get(f"/comic/read/{urlsafe_base64_encode(book.selector.bytes)}/0/img") self.assertEqual(response.status_code, 200) @@ -301,7 +306,7 @@ class ComicBookTests(TestCase): page = ComicPage.objects.get(Comic=book, index=0) dup_page = ComicPage(Comic=book, index=0, page_file_name=page.page_file_name, content_type=page.content_type) dup_page.save() - c.login(username="test", password="test") + response = c.get(f"/comic/read/{urlsafe_base64_encode(book.selector.bytes)}/") response = c.get(f"/comic/read/{urlsafe_base64_encode(book.selector.bytes)}/0/img") self.assertEqual(response.status_code, 200) diff --git a/comic/urls.py b/comic/urls.py index 8cab36e..42c27be 100644 --- a/comic/urls.py +++ b/comic/urls.py @@ -4,21 +4,21 @@ from django.urls import path from . import feeds, views urlpatterns = [ - url(r"^$", views.comic_list, name="index"), - url(r"^settings/$", views.settings_page, name="settings"), - url(r"^settings/users/$", views.users_page, name="users"), - url(r"^settings/users/(?P[0-9]+)/$", views.user_config_page, name="user_details"), - url(r"^settings/users/add/$", views.user_add_page, name="add_users"), - url(r"^account/$", views.account_page, name="account"), - url(r"^read/(?P[\w-]+)/$", views.read_comic, name="read_comic"), - url(r"^set_page/(?P[\w-]+)/(?P[0-9]+)/$", views.set_read_page, name="set_read_page"), - url(r"^read/(?P[\w-]+)/(?P[0-9]+)/img$", views.get_image, name="get_image"), - url(r"^read/(?P[\w-]+)/pdf$", views.get_pdf, name="get_pdf"), - url(r"^list_json/$", views.comic_list_json, name="comic_list_json1"), - url(r"^list_json/(?P[\w-]+)/$", views.comic_list_json, name="comic_list_json2"), - url(r"^recent/$", views.recent_comics, name="recent_comics"), - url(r"^recent/json/$", views.recent_comics_json, name="recent_comics_json"), - url(r"^edit/$", views.comic_edit, name="comic_edit"), - url(r"^feed/(?P[\w-]+)/$", feeds.RecentComics()), - url(r"^(?P[\w-]+)/$", views.comic_list, name="comic_list"), + path("", views.comic_list, name="index"), + path("settings/", views.settings_page, name="settings"), + path("settings/users/", views.users_page, name="users"), + path("settings/users//", views.user_config_page, name="user_details"), + path("settings/users/add/", views.user_add_page, name="add_users"), + path("account/", views.account_page, name="account"), + path("read//", views.read_comic, name="read_comic"), + path("set_page///", views.set_read_page, name="set_read_page"), + path("read///img", views.get_image, name="get_image"), + path("read//pdf", views.get_pdf, name="get_pdf"), + path("list_json/", views.comic_list_json, name="comic_list_json1"), + path("list_json//", views.comic_list_json, name="comic_list_json2"), + path("recent/", views.recent_comics, name="recent_comics"), + path("recent/json/", views.recent_comics_json, name="recent_comics_json"), + path("edit/", views.comic_edit, name="comic_edit"), + path("feed//", feeds.RecentComics()), + path("/", views.comic_list, name="comic_list"), ] diff --git a/comic/views.py b/comic/views.py index 062e4c2..f448550 100644 --- a/comic/views.py +++ b/comic/views.py @@ -9,6 +9,7 @@ from django.db.transaction import atomic from django.http import HttpResponse, FileResponse from django.shortcuts import get_object_or_404, redirect, render from django.utils.http import urlsafe_base64_decode, urlsafe_base64_encode +from django.views.decorators.clickjacking import xframe_options_sameorigin from django.views.decorators.csrf import ensure_csrf_cookie from django.views.decorators.http import require_POST @@ -290,8 +291,10 @@ def settings_page(request): return render(request, "comic/settings_page.html", context) + @login_required def read_comic(request, comic_selector): + selector = uuid.UUID(bytes=urlsafe_base64_decode(comic_selector)) try: book = ComicBook.objects.get(selector=selector) @@ -309,7 +312,6 @@ def read_comic(request, comic_selector): context = { "book": book, "pages": pages, - # "orig_file_name": book.page_name(page), "nav": book.nav(request.user), "status": status, "breadcrumbs": generate_breadcrumbs_from_path(book.directory, book), @@ -320,6 +322,8 @@ def read_comic(request, comic_selector): context['status'].last_read_page += 1 return render(request, "comic/read_comic_pdf.html", context) else: + book.verify_pages(pages) + context['pages'] = ComicPage.objects.filter(Comic=book) return render(request, "comic/read_comic.html", context) @@ -339,6 +343,7 @@ def set_read_page(request, comic_selector, page): return HttpResponse(status=200) +@xframe_options_sameorigin @login_required def get_image(_, comic_selector, page): selector = uuid.UUID(bytes=urlsafe_base64_decode(comic_selector)) diff --git a/poetry.lock b/poetry.lock index fd858ec..d025bbf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -111,17 +111,6 @@ django = ">=2.2,<4.0" [package.extras] docs = ["sphinx (>=2.4,<3.0)", "sphinx_rtd_theme (>=0.4.3,<0.5.0)", "m2r2 (>=0.2.5,<0.3.0)"] -[[package]] -name = "django-db-mutex" -version = "2.0.0" -description = "Acquire a mutex via the DB in Django" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -Django = ">=2.2" - [[package]] name = "django-recaptcha2" version = "1.4.1" @@ -374,7 +363,7 @@ dev = ["pytest (>=4.6.2)", "black (>=19.3b0)"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "a4bc175a3b3f8918926aae3b74d4d5fee3111ddc61dcb3b37c353c0e99edc277" +content-hash = "2cf2bf8371f6fa6dcc2a9d8d889387667eed3a994e46797c5113f8ea7e7ffd95" [metadata.files] asgiref = [ @@ -468,10 +457,6 @@ django-bootstrap4 = [ {file = "django-bootstrap4-2.3.1.tar.gz", hash = "sha256:2c199020ac38866cdf8d1c5561ce7468116b9685b455a29843c0225ef8568879"}, {file = "django_bootstrap4-2.3.1-py3-none-any.whl", hash = "sha256:b68f073b647b20ec7894a252a0ca4e06b7b8dafdbad995cb0cdc783d0bb4629d"}, ] -django-db-mutex = [ - {file = "django-db-mutex-2.0.0.tar.gz", hash = "sha256:b8f3466611ac0045ec4d07f47ded159d08a3780ebc95c48c86af3c909fb2e3e6"}, - {file = "django_db_mutex-2.0.0-py2.py3-none-any.whl", hash = "sha256:6d41176f4094c7a32c0ad6d157a0c268f23ca747ee5f32eadf7a18efe369715e"}, -] django-recaptcha2 = [ {file = "django-recaptcha2-1.4.1.tar.gz", hash = "sha256:c0b43851b05c6bf6ebb5ecc890c13ccedacd9bb33d64b4291c74dd6fcbc89366"}, {file = "django_recaptcha2-1.4.1-py3-none-any.whl", hash = "sha256:9ea90db0cec502741be1066c09ec1b8e02a73162a319a042e78e67c4605087af"}, diff --git a/pyproject.toml b/pyproject.toml index 1bd9c2e..d1a4ecb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ mysqlclient = "^2.0.1" psycopg2 = "^2.8.6" rarfile = "^4.0" coverage = "^5.5" -django-db-mutex = "^2.0.0" [tool.poetry.dev-dependencies]