From 060bb6f64f60120126818a5c2bfac31a309363ed Mon Sep 17 00:00:00 2001 From: Ajurna Date: Tue, 25 May 2021 08:58:08 +0100 Subject: [PATCH] working alpha --- .gitignore | 1 + main.py | 40 +++++++++++++ poetry.lock | 157 +++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 17 ++++++ 4 files changed, 215 insertions(+) create mode 100644 main.py create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 7f7cccc..68c25ac 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ docs/_build/ # PyBuilder target/ +.idea \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..08be0e9 --- /dev/null +++ b/main.py @@ -0,0 +1,40 @@ +import time +from pathlib import Path +from urllib.parse import urlparse +import requests +from zipfile import ZipFile, ZipInfo +from bs4 import BeautifulSoup +import cloudscraper +s = cloudscraper.create_scraper() + +def get_html(): + req = requests.get('https://readcomicsonline.ru/comic/deadly-class-2014/40') + with open('blah.html', 'wb') as f: + f.write(req.content) + +def download_comic(): + with ZipFile(f'file.cbz', mode='w') as z: + for page in range(1, 32): + print(page) + req = requests.get(f'https://readcomicsonline.ru/uploads/manga/deadly-class-2014/chapters/40/{page:02}.jpg') + z.writestr(f'{page}.jpg', req.content) + +name = 'Faster Than Light' +issues_to_get = range(1, 11) +baseurl = 'https://readcomicsonline.ru/comic/faster-than-light-2015' + +if __name__ == '__main__': + for issue_number in issues_to_get: + req = s.get(f'{baseurl}{"/" if not baseurl.endswith("/") else ""}{issue_number}/') + soup = BeautifulSoup(req.content, 'html.parser') + with ZipFile(f'{name} {issue_number:03}.cbz', mode='w') as z: + for image_url in soup.find("div", class_="imagecnt").findAll('img'): + try: + image_url_text = image_url.attrs['data-src'].strip() + except KeyError: + continue + print(image_url_text) + req = s.get(image_url_text) + z.writestr(image_url_text.rsplit('/', 1)[1], req.content) + time.sleep(15) + diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..bcb81e9 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,157 @@ +[[package]] +name = "beautifulsoup4" +version = "4.9.3" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +soupsieve = {version = ">1.2", markers = "python_version >= \"3.0\""} + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2020.12.5" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "chardet" +version = "4.0.0" +description = "Universal encoding detector for Python 2 and 3" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "cloudscraper" +version = "1.2.58" +description = "A Python module to bypass Cloudflare's anti-bot page." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pyparsing = ">=2.4.7" +requests = ">=2.9.2" +requests-toolbelt = ">=0.9.1" + +[[package]] +name = "idna" +version = "2.10" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pyparsing" +version = "2.4.7" +description = "Python parsing module" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "requests" +version = "2.25.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +certifi = ">=2017.4.17" +chardet = ">=3.0.2,<5" +idna = ">=2.5,<3" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] + +[[package]] +name = "requests-toolbelt" +version = "0.9.1" +description = "A utility belt for advanced users of python-requests" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "soupsieve" +version = "2.2.1" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "urllib3" +version = "1.26.4" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +brotli = ["brotlipy (>=0.6.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.9" +content-hash = "29661d3c97777a96755c64dbba2e3c5f35b3f265a34f0887a5ab3d86ceadf423" + +[metadata.files] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"}, + {file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"}, + {file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"}, +] +certifi = [ + {file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"}, + {file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"}, +] +chardet = [ + {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, + {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, +] +cloudscraper = [ + {file = "cloudscraper-1.2.58-py2.py3-none-any.whl", hash = "sha256:674fd739f9412188aae8d6614e3e6316939fc0670ef5646abd3d316f1a59d3c2"}, + {file = "cloudscraper-1.2.58.tar.gz", hash = "sha256:dda29028c5628b5ba3e4dc43816ed38fd46bd945ef938c420f185586a6d8dff2"}, +] +idna = [ + {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, + {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, +] +pyparsing = [ + {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, + {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, +] +requests = [ + {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, + {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, +] +requests-toolbelt = [ + {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"}, + {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"}, +] +soupsieve = [ + {file = "soupsieve-2.2.1-py3-none-any.whl", hash = "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"}, + {file = "soupsieve-2.2.1.tar.gz", hash = "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc"}, +] +urllib3 = [ + {file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"}, + {file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4e04ef6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[tool.poetry] +name = "comic_downloader" +version = "0.1.0" +description = "" +authors = ["Ajurna "] + +[tool.poetry.dependencies] +python = "^3.9" +requests = "^2.25.1" +beautifulsoup4 = "^4.9.3" +cloudscraper = "^1.2.58" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api"