Merge pull request #7 from karl0ss/reworked

add support for single file and recursive folder
This commit is contained in:
Karl0ss 2024-07-23 16:00:21 +01:00 committed by GitHub
commit 3cac2ef456
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 83 additions and 33 deletions

View File

@ -15,16 +15,6 @@ def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter formatter_class=argparse.ArgumentDefaultsHelpFormatter
) )
# parser.add_argument(
# "--audio_channel", default="0", type=int, help="audio channel index to use"
# )
# parser.add_argument(
# "--sample_interval",
# type=str2timeinterval,
# default=None,
# help="generate subtitles for a specific \
# fragment of the video (e.g. 01:02:05-01:03:45)",
# )
parser.add_argument( parser.add_argument(
"--model", "--model",
default="small", default="small",
@ -55,6 +45,18 @@ def main():
# help="Type to use for computation. \ # help="Type to use for computation. \
# See https://opennmt.net/CTranslate2/quantization.html.", # See https://opennmt.net/CTranslate2/quantization.html.",
# ) # )
parser.add_argument(
"--file",
type=str,
default=None,
help="Process a single file"
)
parser.add_argument(
"--folder",
type=str,
default=None,
help="Process all videos in folder"
)
parser.add_argument( parser.add_argument(
"--show", "--show",
type=str, type=str,
@ -70,7 +72,12 @@ def main():
help="What is the origin language of the video? \ help="What is the origin language of the video? \
If unset, it is detected automatically.", If unset, it is detected automatically.",
) )
parser.add_argument(
"--backend",
type=str,
default="whisper",
choices=["whisper", "faster_whisper"],
)
args = parser.parse_args().__dict__ args = parser.parse_args().__dict__
process(args) process(args)

View File

@ -6,28 +6,39 @@ from utils.files import filename, write_srt
from utils.ffmpeg import get_audio, add_subtitles_to_mp4 from utils.ffmpeg import get_audio, add_subtitles_to_mp4
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
from utils.sonarr import update_show_in_sonarr from utils.sonarr import update_show_in_sonarr
# from utils.faster_whisper import WhisperAI from utils.faster_whisper import WhisperAI as fasterWhisperAI
from utils.whisper import WhisperAI from utils.whisper import WhisperAI
from utils.decorator import measure_time from utils.decorator import measure_time
def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language")
show: str = args.pop("show")
if model_name.endswith(".en"):
warnings.warn(
f"{model_name} is an English-only model, forcing English detection."
)
args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language
model_args = {}
model_args["device"] = args.pop("device") def folder_flow(folder, model_args, args, backend):
print(f"Processing {folder}")
files = os.listdir(folder)
for file in files:
print(f"processing {file}")
path = folder+file
try:
audios = get_audio([path], 0, None)
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args, backend)
add_subtitles_to_mp4(subtitles)
time.sleep(5)
except Exception as ex:
print(f"skipping file due to - {ex}")
def file_flow(show, model_args, args, backend):
print(f"Processing {show}")
try:
audios = get_audio([show], 0, None)
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args, backend)
add_subtitles_to_mp4(subtitles)
time.sleep(5)
except Exception as ex:
print(f"skipping file due to - {ex}")
def bazzar_flow(show, model_args, args, backend):
list_of_episodes_needing_subtitles = get_wanted_episodes(show) list_of_episodes_needing_subtitles = get_wanted_episodes(show)
print( print(
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
@ -37,7 +48,7 @@ def process(args: dict):
episode_data = get_episode_details(episode["sonarrEpisodeId"]) episode_data = get_episode_details(episode["sonarrEpisodeId"])
try: try:
audios = get_audio([episode_data["path"]], 0, None) audios = get_audio([episode_data["path"]], 0, None)
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args, backend)
add_subtitles_to_mp4(subtitles) add_subtitles_to_mp4(subtitles)
update_show_in_sonarr(episode["sonarrSeriesId"]) update_show_in_sonarr(episode["sonarrSeriesId"])
@ -46,12 +57,15 @@ def process(args: dict):
except Exception as ex: except Exception as ex:
print(f"skipping file due to - {ex}") print(f"skipping file due to - {ex}")
@measure_time @measure_time
def get_subtitles( def get_subtitles(
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict, backend: str
): ):
model = WhisperAI(model_args, transcribe_args) if backend == 'whisper':
model = WhisperAI(model_args, transcribe_args)
else:
model = fasterWhisperAI(model_args, transcribe_args)
subtitles_path = {} subtitles_path = {}
for path, audio_path in audio_paths.items(): for path, audio_path in audio_paths.items():
@ -66,3 +80,32 @@ def get_subtitles(
subtitles_path[path] = srt_path subtitles_path[path] = srt_path
return subtitles_path return subtitles_path
def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language")
show: str = args.pop("show")
file: str = args.pop("file")
folder: str = args.pop("folder")
backend: str = args.pop("backend")
if model_name.endswith(".en"):
warnings.warn(
f"{model_name} is an English-only model, forcing English detection."
)
args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language
model_args = {}
model_args["device"] = args.pop("device")
if file:
file_flow(file, model_args, args, backend)
elif folder:
folder_flow(folder, model_args, args, backend)
else:
bazzar_flow(show, model_args, args, backend)