From fde1b4d89ef404fd6bd0b326e6d861e9ab174303 Mon Sep 17 00:00:00 2001 From: Karl Date: Sat, 13 Jul 2024 09:22:02 +0000 Subject: [PATCH 1/4] add cuda deps --- .vscode/launch.json | 4 +- bazarr-ai-sub-generator/main.py | 14 ++++++- bazarr-ai-sub-generator/utils/files.py | 6 +-- bazarr-ai-sub-generator/utils/whisper.py | 51 ++++++++++++++++-------- requirements.txt | 3 +- 5 files changed, 55 insertions(+), 23 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 94c9cc5..a5131f3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,8 +5,8 @@ "version": "0.2.0", "configurations": [ { - "name": "Python: Current File", - "type": "python", + "name": "Python Debugger: Current File", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py index 64074a8..d5202fa 100644 --- a/bazarr-ai-sub-generator/main.py +++ b/bazarr-ai-sub-generator/main.py @@ -8,8 +8,20 @@ from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series from utils.sonarr import update_show_in_sonarr from utils.whisper import WhisperAI +def measure_time(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + duration = end_time - start_time + print(f"Function '{func.__name__}' executed in: {duration:.6f} seconds") + return result + return wrapper + + def process(args: dict): + model_name: str = args.pop("model") language: str = args.pop("language") sample_interval: str = args.pop("sample_interval") @@ -44,7 +56,7 @@ def process(args: dict): time.sleep(5) sync_series() - +@measure_time def get_subtitles( audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict ): diff --git a/bazarr-ai-sub-generator/utils/files.py b/bazarr-ai-sub-generator/utils/files.py index ea40253..29faa08 100644 --- a/bazarr-ai-sub-generator/utils/files.py +++ b/bazarr-ai-sub-generator/utils/files.py @@ -7,9 +7,9 @@ def write_srt(transcript: Iterator[dict], file: TextIO): for i, segment in enumerate(transcript, start=1): print( f"{i}\n" - f"{format_timestamp(segment.start, always_include_hours=True)} --> " - f"{format_timestamp(segment.end, always_include_hours=True)}\n" - f"{segment.text.strip().replace('-->', '->')}\n", + f"{format_timestamp(segment['start'], always_include_hours=True)} --> " + f"{format_timestamp(segment['end'], always_include_hours=True)}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", file=file, flush=True, ) diff --git a/bazarr-ai-sub-generator/utils/whisper.py b/bazarr-ai-sub-generator/utils/whisper.py index 5e823b1..6db019c 100644 --- a/bazarr-ai-sub-generator/utils/whisper.py +++ b/bazarr-ai-sub-generator/utils/whisper.py @@ -1,9 +1,9 @@ import warnings -import faster_whisper +import torch +import whisper from tqdm import tqdm -# pylint: disable=R0903 class WhisperAI: """ Wrapper class for the Whisper speech recognition model with additional functionality. @@ -23,23 +23,35 @@ class WhisperAI: ``` Args: - - model_args: Arguments to pass to WhisperModel initialize method - - model_size_or_path (str): The name of the Whisper model to use. - - device (str): The device to use for computation ("cpu", "cuda", "auto"). - - compute_type (str): The type to use for computation. - See https://opennmt.net/CTranslate2/quantization.html. + - model_args (dict): Arguments to pass to Whisper model initialization + - model_size (str): The name of the Whisper model to use. + - device (str): The device to use for computation ("cpu" or "cuda"). - transcribe_args (dict): Additional arguments to pass to the transcribe method. Attributes: - - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model. + - model (whisper.Whisper): The underlying Whisper speech recognition model. + - device (torch.device): The device to use for computation. - transcribe_args (dict): Additional arguments used for transcribe method. Methods: - - transcribe(audio_path): Transcribes an audio file and yields the resulting segments. + - transcribe(audio_path: str): Transcribes an audio file and yields the resulting segments. """ def __init__(self, model_args: dict, transcribe_args: dict): - self.model = faster_whisper.WhisperModel(**model_args) + """ + Initializes the WhisperAI instance. + + Args: + - model_args (dict): Arguments to initialize the Whisper model. + - transcribe_args (dict): Additional arguments for the transcribe method. + """ + device = "cuda" if torch.cuda.is_available() else "cpu" + print(device) + # Set device for computation + self.device = torch.device(device) + # Load the Whisper model with the specified size + self.model = whisper.load_model("base").to(self.device) + # Store the additional transcription arguments self.transcribe_args = transcribe_args def transcribe(self, audio_path: str): @@ -50,17 +62,24 @@ class WhisperAI: - audio_path (str): The path to the audio file for transcription. Yields: - - faster_whisper.TranscriptionSegment: An individual transcription segment. + - dict: An individual transcription segment. """ + # Suppress warnings during transcription warnings.filterwarnings("ignore") - segments, info = self.model.transcribe(audio_path, **self.transcribe_args) + # Load and transcribe the audio file + result = self.model.transcribe(audio_path, **self.transcribe_args) + # Restore default warning behavior warnings.filterwarnings("default") - # Same precision as the Whisper timestamps. - total_duration = round(info.duration, 2) + # Calculate the total duration from the segments + total_duration = max(segment["end"] for segment in result["segments"]) + # Create a progress bar with the total duration of the audio file with tqdm(total=total_duration, unit=" seconds") as pbar: - for segment in segments: + for segment in result["segments"]: + # Yield each transcription segment yield segment - pbar.update(segment.end - segment.start) + # Update the progress bar with the duration of the current segment + pbar.update(segment["end"] - segment["start"]) + # Ensure the progress bar reaches 100% upon completion pbar.update(0) diff --git a/requirements.txt b/requirements.txt index eab95da..ec34ef1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ faster-whisper==0.10.0 tqdm==4.56.0 -ffmpeg-python==0.2.0 \ No newline at end of file +ffmpeg-python==0.2.0 +git+https://github.com/openai/whisper.git From 7e83e4ef1e9cf590c454a66107c11baa8c20eb4a Mon Sep 17 00:00:00 2001 From: Karl Date: Sat, 13 Jul 2024 09:35:19 +0000 Subject: [PATCH 2/4] cleanup and add ability to process specifc show only --- .vscode/launch.json | 2 + bazarr-ai-sub-generator/cli.py | 96 ++++++++++++------------- bazarr-ai-sub-generator/main.py | 13 ++-- bazarr-ai-sub-generator/utils/bazarr.py | 10 ++- 4 files changed, 64 insertions(+), 57 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index a5131f3..6cacf66 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,6 +14,8 @@ "args": [ "--model", "base", + "--show", + "Gary Neville's Soccerbox" ], } ] diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py index d1436b3..8780f82 100644 --- a/bazarr-ai-sub-generator/cli.py +++ b/bazarr-ai-sub-generator/cli.py @@ -15,16 +15,16 @@ def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument( - "--audio_channel", default="0", type=int, help="audio channel index to use" - ) - parser.add_argument( - "--sample_interval", - type=str2timeinterval, - default=None, - help="generate subtitles for a specific \ - fragment of the video (e.g. 01:02:05-01:03:45)", - ) + # parser.add_argument( + # "--audio_channel", default="0", type=int, help="audio channel index to use" + # ) + # parser.add_argument( + # "--sample_interval", + # type=str2timeinterval, + # default=None, + # help="generate subtitles for a specific \ + # fragment of the video (e.g. 01:02:05-01:03:45)", + # ) parser.add_argument( "--model", default="small", @@ -38,46 +38,46 @@ def main(): choices=["cpu", "cuda", "auto"], help='Device to use for computation ("cpu", "cuda", "auto")', ) + # parser.add_argument( + # "--compute_type", + # type=str, + # default="default", + # choices=[ + # "int8", + # "int8_float32", + # "int8_float16", + # "int8_bfloat16", + # "int16", + # "float16", + # "bfloat16", + # "float32", + # ], + # help="Type to use for computation. \ + # See https://opennmt.net/CTranslate2/quantization.html.", + # ) + # parser.add_argument( + # "--beam_size", + # type=int, + # default=5, + # help="model parameter, tweak to increase accuracy", + # ) + # parser.add_argument( + # "--no_speech_threshold", + # type=float, + # default=0.6, + # help="model parameter, tweak to increase accuracy", + # ) + # parser.add_argument( + # "--condition_on_previous_text", + # type=str2bool, + # default=True, + # help="model parameter, tweak to increase accuracy", + # ) parser.add_argument( - "--compute_type", + "--show", type=str, - default="default", - choices=[ - "int8", - "int8_float32", - "int8_float16", - "int8_bfloat16", - "int16", - "float16", - "bfloat16", - "float32", - ], - help="Type to use for computation. \ - See https://opennmt.net/CTranslate2/quantization.html.", - ) - parser.add_argument( - "--beam_size", - type=int, - default=5, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--no_speech_threshold", - type=float, - default=0.6, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--condition_on_previous_text", - type=str2bool, - default=True, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--task", - type=str, - default="transcribe", - choices=["transcribe", "translate"], + default=None, + #choices=["transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') \ or X->English translation ('translate')", ) diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py index d5202fa..2fe1ba3 100644 --- a/bazarr-ai-sub-generator/main.py +++ b/bazarr-ai-sub-generator/main.py @@ -24,8 +24,9 @@ def process(args: dict): model_name: str = args.pop("model") language: str = args.pop("language") - sample_interval: str = args.pop("sample_interval") - audio_channel: str = args.pop("audio_channel") + show: str = args.pop("show") + # sample_interval: str = args.pop("sample_interval") + # audio_channel: str = args.pop("audio_channel") if model_name.endswith(".en"): warnings.warn( @@ -37,18 +38,18 @@ def process(args: dict): args["language"] = language model_args = {} - model_args["model_size_or_path"] = model_name + # model_args["model_size_or_path"] = model_name model_args["device"] = args.pop("device") - model_args["compute_type"] = args.pop("compute_type") + # model_args["compute_type"] = args.pop("compute_type") - list_of_episodes_needing_subtitles = get_wanted_episodes() + list_of_episodes_needing_subtitles = get_wanted_episodes(show) print( f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." ) for episode in list_of_episodes_needing_subtitles["data"]: print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}") episode_data = get_episode_details(episode["sonarrEpisodeId"]) - audios = get_audio([episode_data["path"]], audio_channel, sample_interval) + audios = get_audio([episode_data["path"]], 0, None) subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) add_subtitles_to_mp4(subtitles) diff --git a/bazarr-ai-sub-generator/utils/bazarr.py b/bazarr-ai-sub-generator/utils/bazarr.py index dc110a7..722f53d 100644 --- a/bazarr-ai-sub-generator/utils/bazarr.py +++ b/bazarr-ai-sub-generator/utils/bazarr.py @@ -8,15 +8,19 @@ token = config._sections["bazarr"]["token"] base_url = config._sections["bazarr"]["url"] -def get_wanted_episodes(): +def get_wanted_episodes(show: str=None): url = f"{base_url}/api/episodes/wanted" payload = {} headers = {"accept": "application/json", "X-API-KEY": token} response = requests.request("GET", url, headers=headers, data=payload) - - return response.json() + + data = response.json() + if show != None: + data['data'] = [item for item in data['data'] if item['seriesTitle'] == show] + data['total'] = len(data['data']) + return data def get_episode_details(episode_id: str): From 281b3cabc15770258793aa41a444bcac0c97a766 Mon Sep 17 00:00:00 2001 From: Karl Date: Sat, 13 Jul 2024 09:56:21 +0000 Subject: [PATCH 3/4] more cleanup --- .vscode/launch.json | 7 +++++-- bazarr-ai-sub-generator/cli.py | 1 - bazarr-ai-sub-generator/main.py | 21 +++------------------ bazarr-ai-sub-generator/utils/decorator.py | 13 +++++++++++++ requirements.txt | 1 - setup.py | 1 - 6 files changed, 21 insertions(+), 23 deletions(-) create mode 100644 bazarr-ai-sub-generator/utils/decorator.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 6cacf66..ccbc084 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,12 +11,15 @@ "program": "${file}", "console": "integratedTerminal", "justMyCode": false, + "env": { + "CUDA_VISIBLE_DEVICES": "1" + }, "args": [ "--model", "base", "--show", "Gary Neville's Soccerbox" - ], + ] } ] -} \ No newline at end of file +} diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py index 8780f82..ad22bf6 100644 --- a/bazarr-ai-sub-generator/cli.py +++ b/bazarr-ai-sub-generator/cli.py @@ -77,7 +77,6 @@ def main(): "--show", type=str, default=None, - #choices=["transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') \ or X->English translation ('translate')", ) diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py index 2fe1ba3..76d009b 100644 --- a/bazarr-ai-sub-generator/main.py +++ b/bazarr-ai-sub-generator/main.py @@ -7,27 +7,14 @@ from utils.ffmpeg import get_audio, add_subtitles_to_mp4 from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series from utils.sonarr import update_show_in_sonarr from utils.whisper import WhisperAI - -def measure_time(func): - def wrapper(*args, **kwargs): - start_time = time.time() - result = func(*args, **kwargs) - end_time = time.time() - duration = end_time - start_time - print(f"Function '{func.__name__}' executed in: {duration:.6f} seconds") - return result - return wrapper - - +from utils.decorator import measure_time def process(args: dict): model_name: str = args.pop("model") language: str = args.pop("language") show: str = args.pop("show") - # sample_interval: str = args.pop("sample_interval") - # audio_channel: str = args.pop("audio_channel") - + if model_name.endswith(".en"): warnings.warn( f"{model_name} is an English-only model, forcing English detection." @@ -38,10 +25,8 @@ def process(args: dict): args["language"] = language model_args = {} - # model_args["model_size_or_path"] = model_name model_args["device"] = args.pop("device") - # model_args["compute_type"] = args.pop("compute_type") - + list_of_episodes_needing_subtitles = get_wanted_episodes(show) print( f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." diff --git a/bazarr-ai-sub-generator/utils/decorator.py b/bazarr-ai-sub-generator/utils/decorator.py new file mode 100644 index 0000000..d06a988 --- /dev/null +++ b/bazarr-ai-sub-generator/utils/decorator.py @@ -0,0 +1,13 @@ +import time +from datetime import timedelta + +def measure_time(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + duration = end_time - start_time + human_readable_duration = str(timedelta(seconds=duration)) + print(f"Function '{func.__name__}' executed in: {human_readable_duration}") + return result + return wrapper diff --git a/requirements.txt b/requirements.txt index ec34ef1..755a1a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -faster-whisper==0.10.0 tqdm==4.56.0 ffmpeg-python==0.2.0 git+https://github.com/openai/whisper.git diff --git a/setup.py b/setup.py index 337aa84..96873f9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,6 @@ setup( py_modules=["bazarr-ai-sub-generator"], author="Karl Hudgell", install_requires=[ - 'faster-whisper', 'tqdm', 'ffmpeg-python' ], From 966fa639c48d4011e2e056172be395af220dbfe8 Mon Sep 17 00:00:00 2001 From: Karl Date: Tue, 16 Jul 2024 07:31:28 +0000 Subject: [PATCH 4/4] more updates, working with whisper and faster-whipser --- .vscode/launch.json | 22 ++++-- bazarr-ai-sub-generator/cli.py | 18 ----- bazarr-ai-sub-generator/main.py | 16 +++-- .../utils/faster_whisper.py | 68 +++++++++++++++++++ bazarr-ai-sub-generator/utils/whisper.py | 2 +- requirements.txt | 6 ++ 6 files changed, 102 insertions(+), 30 deletions(-) create mode 100644 bazarr-ai-sub-generator/utils/faster_whisper.py diff --git a/.vscode/launch.json b/.vscode/launch.json index ccbc084..f9576a1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,19 +6,31 @@ "configurations": [ { "name": "Python Debugger: Current File", - "type": "debugpy", + "type": "python", "request": "launch", "program": "${file}", "console": "integratedTerminal", "justMyCode": false, "env": { - "CUDA_VISIBLE_DEVICES": "1" + "CUDA_VISIBLE_DEVICES": "1", + "LD_LIBRARY_PATH": "/home/karl/faster-auto-subtitle/venv/lib/python3.11/site-packages/nvidia/cublas/lib:/home/karl/faster-auto-subtitle/venv/lib/python3.11/site-packages/nvidia/cudnn/lib" }, "args": [ "--model", - "base", - "--show", - "Gary Neville's Soccerbox" + "base" + ] + }, + { + "name": "Current (withenv)", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/run_with_env.sh", + "console": "integratedTerminal", + "justMyCode": false, + "args": [ + "${file}", + "--model", + "base" ] } ] diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py index ad22bf6..f788796 100644 --- a/bazarr-ai-sub-generator/cli.py +++ b/bazarr-ai-sub-generator/cli.py @@ -55,24 +55,6 @@ def main(): # help="Type to use for computation. \ # See https://opennmt.net/CTranslate2/quantization.html.", # ) - # parser.add_argument( - # "--beam_size", - # type=int, - # default=5, - # help="model parameter, tweak to increase accuracy", - # ) - # parser.add_argument( - # "--no_speech_threshold", - # type=float, - # default=0.6, - # help="model parameter, tweak to increase accuracy", - # ) - # parser.add_argument( - # "--condition_on_previous_text", - # type=str2bool, - # default=True, - # help="model parameter, tweak to increase accuracy", - # ) parser.add_argument( "--show", type=str, diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py index 76d009b..c38fa16 100644 --- a/bazarr-ai-sub-generator/main.py +++ b/bazarr-ai-sub-generator/main.py @@ -6,6 +6,7 @@ from utils.files import filename, write_srt from utils.ffmpeg import get_audio, add_subtitles_to_mp4 from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series from utils.sonarr import update_show_in_sonarr +# from utils.faster_whisper import WhisperAI from utils.whisper import WhisperAI from utils.decorator import measure_time @@ -34,13 +35,16 @@ def process(args: dict): for episode in list_of_episodes_needing_subtitles["data"]: print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}") episode_data = get_episode_details(episode["sonarrEpisodeId"]) - audios = get_audio([episode_data["path"]], 0, None) - subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) + try: + audios = get_audio([episode_data["path"]], 0, None) + subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) - add_subtitles_to_mp4(subtitles) - update_show_in_sonarr(episode["sonarrSeriesId"]) - time.sleep(5) - sync_series() + add_subtitles_to_mp4(subtitles) + update_show_in_sonarr(episode["sonarrSeriesId"]) + time.sleep(5) + sync_series() + except Exception as ex: + print(f"skipping file due to - {ex}") @measure_time def get_subtitles( diff --git a/bazarr-ai-sub-generator/utils/faster_whisper.py b/bazarr-ai-sub-generator/utils/faster_whisper.py new file mode 100644 index 0000000..a9700a8 --- /dev/null +++ b/bazarr-ai-sub-generator/utils/faster_whisper.py @@ -0,0 +1,68 @@ +import warnings +import faster_whisper +from tqdm import tqdm + + +# pylint: disable=R0903 +class WhisperAI: + """ + Wrapper class for the Whisper speech recognition model with additional functionality. + + This class provides a high-level interface for transcribing audio files using the Whisper + speech recognition model. It encapsulates the model instantiation and transcription process, + allowing users to easily transcribe audio files and iterate over the resulting segments. + + Usage: + ```python + whisper = WhisperAI(model_args, transcribe_args) + + # Transcribe an audio file and iterate over the segments + for segment in whisper.transcribe(audio_path): + # Process each transcription segment + print(segment) + ``` + + Args: + - model_args: Arguments to pass to WhisperModel initialize method + - model_size_or_path (str): The name of the Whisper model to use. + - device (str): The device to use for computation ("cpu", "cuda", "auto"). + - compute_type (str): The type to use for computation. + See https://opennmt.net/CTranslate2/quantization.html. + - transcribe_args (dict): Additional arguments to pass to the transcribe method. + + Attributes: + - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model. + - transcribe_args (dict): Additional arguments used for transcribe method. + + Methods: + - transcribe(audio_path): Transcribes an audio file and yields the resulting segments. + """ + + def __init__(self, model_args: dict, transcribe_args: dict): + # self.model = faster_whisper.WhisperModel(**model_args) + model_size = "base" + self.model = faster_whisper.WhisperModel(model_size, device="cuda") + self.transcribe_args = transcribe_args + + def transcribe(self, audio_path: str): + """ + Transcribes the specified audio file and yields the resulting segments. + + Args: + - audio_path (str): The path to the audio file for transcription. + + Yields: + - faster_whisper.TranscriptionSegment: An individual transcription segment. + """ + warnings.filterwarnings("ignore") + segments, info = self.model.transcribe(audio_path, beam_size=5) + warnings.filterwarnings("default") + + # Same precision as the Whisper timestamps. + total_duration = round(info.duration, 2) + + with tqdm(total=total_duration, unit=" seconds") as pbar: + for segment in segments: + yield segment + pbar.update(segment.end - segment.start) + pbar.update(0) \ No newline at end of file diff --git a/bazarr-ai-sub-generator/utils/whisper.py b/bazarr-ai-sub-generator/utils/whisper.py index 6db019c..3f2fc9f 100644 --- a/bazarr-ai-sub-generator/utils/whisper.py +++ b/bazarr-ai-sub-generator/utils/whisper.py @@ -50,7 +50,7 @@ class WhisperAI: # Set device for computation self.device = torch.device(device) # Load the Whisper model with the specified size - self.model = whisper.load_model("base").to(self.device) + self.model = whisper.load_model("base.en").to(self.device) # Store the additional transcription arguments self.transcribe_args = transcribe_args diff --git a/requirements.txt b/requirements.txt index 755a1a7..9582f9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,9 @@ tqdm==4.56.0 ffmpeg-python==0.2.0 git+https://github.com/openai/whisper.git +faster-whisper +nvidia-cublas-cu12 +nvidia-cudnn-cu12 +nvidia-cublas-cu11 +nvidia-cudnn-cu11 +ctranslate2==3.24.0 \ No newline at end of file