changed the for loop into a map function to make the scraping/generating links faster

This commit is contained in:
sh1nobu 2021-09-04 01:09:38 +08:00
parent ca2eae558b
commit 01c7a60133
2 changed files with 54 additions and 58 deletions

View File

@ -22,36 +22,18 @@ def get_links(name, episode_number, source=None):
return episode_links return episode_links
def get_download_links(episode_links): def get_download_links(episode_link):
download_links = [] episode_link_resp = requests.get(episode_link, stream=True)
for episode_link in episode_links: soup = BeautifulSoup(episode_link_resp.content, "html.parser")
episode_link_resp = requests.get(episode_link) link = soup.find("li", {"class": "dowloads"})
soup = BeautifulSoup(episode_link_resp.content, "html.parser") return link.a.get("href")
links = soup.find("li", {"class": "dowloads"})
for link in links:
link = link.get("href")
download_links.append(link)
return download_links
def get_download_urls(download_links, bool): def get_download_urls(download_link):
download_urls = [] link = requests.get(download_link, stream=True)
for link in download_links: soup = BeautifulSoup(link.content, "html.parser")
link = requests.get(link) link = soup.find_all("div", {"class": "dowload"})
soup = BeautifulSoup(link.content, "html.parser") return link[0].a.get("href")
download_link = soup.find_all("div", {"class": "dowload"})
download_urls.append(download_link[0].a.get("href"))
if bool:
conv_download_urls = {
episode_title: url for episode_title, url in enumerate(download_urls)
}
else:
conv_download_urls = {
episode_title + 1: url
for episode_title, url in enumerate(download_urls)
}
conv_download_urls = sorted(set(conv_download_urls.items()))
return conv_download_urls
def download_episodes(url): def download_episodes(url):

View File

@ -5,6 +5,7 @@ import ctypes
import os import os
import backend as bd import backend as bd
import colorama import colorama
import concurrent.futures
from tqdm.contrib.concurrent import thread_map from tqdm.contrib.concurrent import thread_map
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from colorama import Fore from colorama import Fore
@ -56,7 +57,7 @@ def bitanime():
""" """
soup = BeautifulSoup(resp.content, "html.parser") soup = BeautifulSoup(resp.content, "html.parser")
episode_number = soup.find("ul", {"id": "episode_page"}) episode_number = soup.find("ul", {"id": "episode_page"})
episode_number = episode_number.get_text().split("-")[1].strip() episode_number = episode_number.get_text().split("-")[-1].strip()
""" """
Print the anime name, episode, and the link of the anime Print the anime name, episode, and the link of the anime
""" """
@ -79,43 +80,56 @@ def bitanime():
episode_zero = soup.find("h1", {"class": "entry-title"}) episode_zero = soup.find("h1", {"class": "entry-title"})
if episode_zero is None: if episode_zero is None:
# Episode 0 does exist # Episode 0 does exist
exec = concurrent.futures.ThreadPoolExecutor()
episode_links = bd.get_links(name, episode_number, source) episode_links = bd.get_links(name, episode_number, source)
download_links = bd.get_download_links(episode_links) download_links = list(exec.map(bd.get_download_links, episode_links))
download_urls = bd.get_download_urls(download_links, True) download_urls = list(exec.map(bd.get_download_urls, download_links))
conv_download_urls = {
episode_title: url for episode_title, url in enumerate(download_urls)
}
download_urls = sorted(set(conv_download_urls.items()))
print(f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)} episode/s") print(f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)} episode/s")
print(f"{Fore.LIGHTGREEN_EX}====================================") print(f"{Fore.LIGHTGREEN_EX}====================================")
bd.get_path(folder) print(download_urls)
thread_map( print(len(download_urls))
bd.download_episodes, download_urls, ncols=75, total=len(download_urls) # bd.get_path(folder)
) # thread_map(
try: # bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
os.startfile(folder) # )
except (AttributeError): # try:
import sys, subprocess # os.startfile(folder)
# except (AttributeError):
# import sys, subprocess
opener = "open" if sys.platform == "darwin" else "xdg-open" # opener = "open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, folder]) # subprocess.call([opener, folder])
else: else:
# Episode 0 does not exist # Episode 0 does not exist
exec = concurrent.futures.ThreadPoolExecutor()
episode_links = bd.get_links(name, episode_number) episode_links = bd.get_links(name, episode_number)
download_links = bd.get_download_links(episode_links) download_links = list(exec.map(bd.get_download_links, episode_links))
download_urls = bd.get_download_urls(download_links, False) download_urls = list(exec.map(bd.get_download_urls, download_links))
print( conv_download_urls = {
f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)}{Fore.RESET} episode/s" episode_title + 1: url
) for episode_title, url in enumerate(download_urls)
}
download_urls = sorted(set(conv_download_urls.items()))
print(f"Downloading {Fore.LIGHTCYAN_EX}{len(download_urls)} episode/s")
print(f"{Fore.LIGHTGREEN_EX}====================================") print(f"{Fore.LIGHTGREEN_EX}====================================")
bd.get_path(folder) print(download_urls)
thread_map( print(len(download_urls))
bd.download_episodes, download_urls, ncols=75, total=len(download_urls) # bd.get_path(folder)
) # thread_map(
try: # bd.download_episodes, download_urls, ncols=75, total=len(download_urls)
os.startfile(folder) # )
except (AttributeError): # try:
import sys, subprocess # os.startfile(folder)
# except (AttributeError):
# import sys, subprocess
opener = "open" if sys.platform == "darwin" else "xdg-open" # opener = "open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, folder]) # subprocess.call([opener, folder])
use_again = input("Do you want to download other anime? (y|n) >> ").lower() use_again = input("Do you want to download other anime? (y|n) >> ").lower()
if use_again == "y": if use_again == "y":