changed the for loop into a map function to make the scraping/generating links faster

This commit is contained in:
sh1nobu 2021-09-04 01:09:38 +08:00
parent ca2eae558b
commit 01c7a60133
2 changed files with 54 additions and 58 deletions

View File

@ -22,36 +22,18 @@ def get_links(name, episode_number, source=None):
return episode_links
def get_download_links(episode_links):
download_links = []
for episode_link in episode_links:
episode_link_resp = requests.get(episode_link)
soup = BeautifulSoup(episode_link_resp.content, "html.parser")
links = soup.find("li", {"class": "dowloads"})
for link in links:
link = link.get("href")
download_links.append(link)
return download_links
def get_download_links(episode_link):
episode_link_resp = requests.get(episode_link, stream=True)
soup = BeautifulSoup(episode_link_resp.content, "html.parser")
link = soup.find("li", {"class": "dowloads"})
return link.a.get("href")
def get_download_urls(download_links, bool):
download_urls = []
for link in download_links:
link = requests.get(link)
soup = BeautifulSoup(link.content, "html.parser")
download_link = soup.find_all("div", {"class": "dowload"})
download_urls.append(download_link[0].a.get("href"))
if bool:
conv_download_urls = {
episode_title: url for episode_title, url in enumerate(download_urls)
}
else:
conv_download_urls = {
episode_title + 1: url
for episode_title, url in enumerate(download_urls)
}
conv_download_urls = sorted(set(conv_download_urls.items()))
return conv_download_urls
def get_download_urls(download_link):
link = requests.get(download_link, stream=True)
soup = BeautifulSoup(link.content, "html.parser")
link = soup.find_all("div", {"class": "dowload"})
return link[0].a.get("href")
def download_episodes(url):
@ -65,4 +47,4 @@ def download_episodes(url):
url_resp = requests.get(url[1], headers=header, stream=True)
file_name = os.path.join(folder_path, f"{url[0]}.mp4")
with open(file_name, "wb") as file:
shutil.copyfileobj(url_resp.raw, file)
shutil.copyfileobj(url_resp.raw, file)

View File

@ -5,6 +5,7 @@ import ctypes
import os
import backend as bd
import colorama
import concurrent.futures
from tqdm.contrib.concurrent import thread_map
from bs4 import BeautifulSoup
from colorama import Fore
@ -56,7 +57,7 @@ def bitanime():
"""
soup = BeautifulSoup(resp.content, "html.parser")
episode_number = soup.find("ul", {"id": "episode_page"})
episode_number = episode_number.get_text().split("-")[1].strip()
episode_number = episode_number.get_text().split("-")[-1].strip()
"""
Print the anime name, episode, and the link of the anime
"""
@ -79,43 +80,56 @@ def bitanime():
episode_zero = soup.find("h1", {"class": "entry-title"})
if episode_zero is None:
# Episode 0 does exist
exec = concurrent.futures.ThreadPoolExecutor()
episode_links = bd.get_links(name, episode_number, source)
download_links = bd.get_download_links(episode_links)
download_urls = bd.get_download_urls(download_links, True)
download_links = list(exec.map(bd.get_download_links, episode_links))
download_urls = list(exec.map(bd.get_download_urls, download_links))
conv_download_urls = {
episode_title: url for episode_title, url in enumerate(download_urls)
}
download_urls = sorted(set(conv_download_urls.items()))
print(f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)} episode/s")
print(f"{Fore.LIGHTGREEN_EX}====================================")
bd.get_path(folder)
thread_map(
bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
)
try:
os.startfile(folder)
except (AttributeError):
import sys, subprocess
print(download_urls)
print(len(download_urls))
# bd.get_path(folder)
# thread_map(
# bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
# )
# try:
# os.startfile(folder)
# except (AttributeError):
# import sys, subprocess
opener = "open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, folder])
# opener = "open" if sys.platform == "darwin" else "xdg-open"
# subprocess.call([opener, folder])
else:
# Episode 0 does not exist
exec = concurrent.futures.ThreadPoolExecutor()
episode_links = bd.get_links(name, episode_number)
download_links = bd.get_download_links(episode_links)
download_urls = bd.get_download_urls(download_links, False)
print(
f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)}{Fore.RESET} episode/s"
)
download_links = list(exec.map(bd.get_download_links, episode_links))
download_urls = list(exec.map(bd.get_download_urls, download_links))
conv_download_urls = {
episode_title + 1: url
for episode_title, url in enumerate(download_urls)
}
download_urls = sorted(set(conv_download_urls.items()))
print(f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)} episode/s")
print(f"{Fore.LIGHTGREEN_EX}====================================")
bd.get_path(folder)
thread_map(
bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
)
try:
os.startfile(folder)
except (AttributeError):
import sys, subprocess
print(download_urls)
print(len(download_urls))
# bd.get_path(folder)
# thread_map(
# bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
# )
# try:
# os.startfile(folder)
# except (AttributeError):
# import sys, subprocess
opener = "open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, folder])
# opener = "open" if sys.platform == "darwin" else "xdg-open"
# subprocess.call([opener, folder])
use_again = input("Do you want to download other anime? (y|n) >> ").lower()
if use_again == "y":
@ -126,4 +140,4 @@ def bitanime():
if __name__ == "__main__":
bitanime()
bitanime()