From fde1b4d89ef404fd6bd0b326e6d861e9ab174303 Mon Sep 17 00:00:00 2001
From: Karl <karl@k-world.me.uk>
Date: Sat, 13 Jul 2024 09:22:02 +0000
Subject: [PATCH 1/4] add cuda deps

---
 .vscode/launch.json                      |  4 +-
 bazarr-ai-sub-generator/main.py          | 14 ++++++-
 bazarr-ai-sub-generator/utils/files.py   |  6 +--
 bazarr-ai-sub-generator/utils/whisper.py | 51 ++++++++++++++++--------
 requirements.txt                         |  3 +-
 5 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 94c9cc5..a5131f3 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -5,8 +5,8 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Current File",
-            "type": "python",
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py
index 64074a8..d5202fa 100644
--- a/bazarr-ai-sub-generator/main.py
+++ b/bazarr-ai-sub-generator/main.py
@@ -8,8 +8,20 @@ from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
 from utils.sonarr import update_show_in_sonarr
 from utils.whisper import WhisperAI
 
+def measure_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        duration = end_time - start_time
+        print(f"Function '{func.__name__}' executed in: {duration:.6f} seconds")
+        return result
+    return wrapper
+
+
 
 def process(args: dict):
+    
     model_name: str = args.pop("model")
     language: str = args.pop("language")
     sample_interval: str = args.pop("sample_interval")
@@ -44,7 +56,7 @@ def process(args: dict):
         time.sleep(5)
         sync_series()
 
-
+@measure_time
 def get_subtitles(
     audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
 ):
diff --git a/bazarr-ai-sub-generator/utils/files.py b/bazarr-ai-sub-generator/utils/files.py
index ea40253..29faa08 100644
--- a/bazarr-ai-sub-generator/utils/files.py
+++ b/bazarr-ai-sub-generator/utils/files.py
@@ -7,9 +7,9 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
     for i, segment in enumerate(transcript, start=1):
         print(
             f"{i}\n"
-            f"{format_timestamp(segment.start, always_include_hours=True)} --> "
-            f"{format_timestamp(segment.end, always_include_hours=True)}\n"
-            f"{segment.text.strip().replace('-->', '->')}\n",
+            f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
+            f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
+            f"{segment['text'].strip().replace('-->', '->')}\n",
             file=file,
             flush=True,
         )
diff --git a/bazarr-ai-sub-generator/utils/whisper.py b/bazarr-ai-sub-generator/utils/whisper.py
index 5e823b1..6db019c 100644
--- a/bazarr-ai-sub-generator/utils/whisper.py
+++ b/bazarr-ai-sub-generator/utils/whisper.py
@@ -1,9 +1,9 @@
 import warnings
-import faster_whisper
+import torch
+import whisper
 from tqdm import tqdm
 
 
-# pylint: disable=R0903
 class WhisperAI:
     """
     Wrapper class for the Whisper speech recognition model with additional functionality.
@@ -23,23 +23,35 @@ class WhisperAI:
     ```
 
     Args:
-    - model_args: Arguments to pass to WhisperModel initialize method
-        - model_size_or_path (str): The name of the Whisper model to use.
-        - device (str): The device to use for computation ("cpu", "cuda", "auto").
-        - compute_type (str): The type to use for computation.
-            See https://opennmt.net/CTranslate2/quantization.html.
+    - model_args (dict): Arguments to pass to Whisper model initialization
+        - model_size (str): The name of the Whisper model to use.
+        - device (str): The device to use for computation ("cpu" or "cuda").
     - transcribe_args (dict): Additional arguments to pass to the transcribe method.
 
     Attributes:
-    - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
+    - model (whisper.Whisper): The underlying Whisper speech recognition model.
+    - device (torch.device): The device to use for computation.
     - transcribe_args (dict): Additional arguments used for transcribe method.
 
     Methods:
-    - transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
+    - transcribe(audio_path: str): Transcribes an audio file and yields the resulting segments.
     """
 
     def __init__(self, model_args: dict, transcribe_args: dict):
-        self.model = faster_whisper.WhisperModel(**model_args)
+        """
+        Initializes the WhisperAI instance.
+
+        Args:
+        - model_args (dict): Arguments to initialize the Whisper model.
+        - transcribe_args (dict): Additional arguments for the transcribe method.
+        """
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(device)
+        # Set device for computation
+        self.device = torch.device(device)
+        # Load the Whisper model with the specified size
+        self.model = whisper.load_model("base").to(self.device)
+        # Store the additional transcription arguments
         self.transcribe_args = transcribe_args
 
     def transcribe(self, audio_path: str):
@@ -50,17 +62,24 @@ class WhisperAI:
         - audio_path (str): The path to the audio file for transcription.
 
         Yields:
-        - faster_whisper.TranscriptionSegment: An individual transcription segment.
+        - dict: An individual transcription segment.
         """
+        # Suppress warnings during transcription
         warnings.filterwarnings("ignore")
-        segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
+        # Load and transcribe the audio file
+        result = self.model.transcribe(audio_path, **self.transcribe_args)
+        # Restore default warning behavior
         warnings.filterwarnings("default")
 
-        # Same precision as the Whisper timestamps.
-        total_duration = round(info.duration, 2)
+        # Calculate the total duration from the segments
+        total_duration = max(segment["end"] for segment in result["segments"])
 
+        # Create a progress bar with the total duration of the audio file
         with tqdm(total=total_duration, unit=" seconds") as pbar:
-            for segment in segments:
+            for segment in result["segments"]:
+                # Yield each transcription segment
                 yield segment
-                pbar.update(segment.end - segment.start)
+                # Update the progress bar with the duration of the current segment
+                pbar.update(segment["end"] - segment["start"])
+            # Ensure the progress bar reaches 100% upon completion
             pbar.update(0)
diff --git a/requirements.txt b/requirements.txt
index eab95da..ec34ef1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 faster-whisper==0.10.0
 tqdm==4.56.0
-ffmpeg-python==0.2.0
\ No newline at end of file
+ffmpeg-python==0.2.0
+git+https://github.com/openai/whisper.git

From 7e83e4ef1e9cf590c454a66107c11baa8c20eb4a Mon Sep 17 00:00:00 2001
From: Karl <karl@k-world.me.uk>
Date: Sat, 13 Jul 2024 09:35:19 +0000
Subject: [PATCH 2/4] cleanup and add ability to process specifc show only

---
 .vscode/launch.json                     |  2 +
 bazarr-ai-sub-generator/cli.py          | 96 ++++++++++++-------------
 bazarr-ai-sub-generator/main.py         | 13 ++--
 bazarr-ai-sub-generator/utils/bazarr.py | 10 ++-
 4 files changed, 64 insertions(+), 57 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index a5131f3..6cacf66 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -14,6 +14,8 @@
             "args": [
                 "--model",
                 "base",
+                "--show",
+                "Gary Neville's Soccerbox"
             ],
         }
     ]
diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py
index d1436b3..8780f82 100644
--- a/bazarr-ai-sub-generator/cli.py
+++ b/bazarr-ai-sub-generator/cli.py
@@ -15,16 +15,16 @@ def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
-    parser.add_argument(
-        "--audio_channel", default="0", type=int, help="audio channel index to use"
-    )
-    parser.add_argument(
-        "--sample_interval",
-        type=str2timeinterval,
-        default=None,
-        help="generate subtitles for a specific \
-                              fragment of the video (e.g. 01:02:05-01:03:45)",
-    )
+    # parser.add_argument(
+    #     "--audio_channel", default="0", type=int, help="audio channel index to use"
+    # )
+    # parser.add_argument(
+    #     "--sample_interval",
+    #     type=str2timeinterval,
+    #     default=None,
+    #     help="generate subtitles for a specific \
+    #                           fragment of the video (e.g. 01:02:05-01:03:45)",
+    # )
     parser.add_argument(
         "--model",
         default="small",
@@ -38,46 +38,46 @@ def main():
         choices=["cpu", "cuda", "auto"],
         help='Device to use for computation ("cpu", "cuda", "auto")',
     )
+    # parser.add_argument(
+    #     "--compute_type",
+    #     type=str,
+    #     default="default",
+    #     choices=[
+    #         "int8",
+    #         "int8_float32",
+    #         "int8_float16",
+    #         "int8_bfloat16",
+    #         "int16",
+    #         "float16",
+    #         "bfloat16",
+    #         "float32",
+    #     ],
+    #     help="Type to use for computation. \
+    #                           See https://opennmt.net/CTranslate2/quantization.html.",
+    # )
+    # parser.add_argument(
+    #     "--beam_size",
+    #     type=int,
+    #     default=5,
+    #     help="model parameter, tweak to increase accuracy",
+    # )
+    # parser.add_argument(
+    #     "--no_speech_threshold",
+    #     type=float,
+    #     default=0.6,
+    #     help="model parameter, tweak to increase accuracy",
+    # )
+    # parser.add_argument(
+    #     "--condition_on_previous_text",
+    #     type=str2bool,
+    #     default=True,
+    #     help="model parameter, tweak to increase accuracy",
+    # )
     parser.add_argument(
-        "--compute_type",
+        "--show",
         type=str,
-        default="default",
-        choices=[
-            "int8",
-            "int8_float32",
-            "int8_float16",
-            "int8_bfloat16",
-            "int16",
-            "float16",
-            "bfloat16",
-            "float32",
-        ],
-        help="Type to use for computation. \
-                              See https://opennmt.net/CTranslate2/quantization.html.",
-    )
-    parser.add_argument(
-        "--beam_size",
-        type=int,
-        default=5,
-        help="model parameter, tweak to increase accuracy",
-    )
-    parser.add_argument(
-        "--no_speech_threshold",
-        type=float,
-        default=0.6,
-        help="model parameter, tweak to increase accuracy",
-    )
-    parser.add_argument(
-        "--condition_on_previous_text",
-        type=str2bool,
-        default=True,
-        help="model parameter, tweak to increase accuracy",
-    )
-    parser.add_argument(
-        "--task",
-        type=str,
-        default="transcribe",
-        choices=["transcribe", "translate"],
+        default=None,
+        #choices=["transcribe", "translate"],
         help="whether to perform X->X speech recognition ('transcribe') \
                               or X->English translation ('translate')",
     )
diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py
index d5202fa..2fe1ba3 100644
--- a/bazarr-ai-sub-generator/main.py
+++ b/bazarr-ai-sub-generator/main.py
@@ -24,8 +24,9 @@ def process(args: dict):
     
     model_name: str = args.pop("model")
     language: str = args.pop("language")
-    sample_interval: str = args.pop("sample_interval")
-    audio_channel: str = args.pop("audio_channel")
+    show: str = args.pop("show")
+    # sample_interval: str = args.pop("sample_interval")
+    # audio_channel: str = args.pop("audio_channel")
 
     if model_name.endswith(".en"):
         warnings.warn(
@@ -37,18 +38,18 @@ def process(args: dict):
         args["language"] = language
 
     model_args = {}
-    model_args["model_size_or_path"] = model_name
+    # model_args["model_size_or_path"] = model_name
     model_args["device"] = args.pop("device")
-    model_args["compute_type"] = args.pop("compute_type")
+    # model_args["compute_type"] = args.pop("compute_type")
 
-    list_of_episodes_needing_subtitles = get_wanted_episodes()
+    list_of_episodes_needing_subtitles = get_wanted_episodes(show)
     print(
         f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
     )
     for episode in list_of_episodes_needing_subtitles["data"]:
         print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
         episode_data = get_episode_details(episode["sonarrEpisodeId"])
-        audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
+        audios = get_audio([episode_data["path"]], 0, None)
         subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
 
         add_subtitles_to_mp4(subtitles)
diff --git a/bazarr-ai-sub-generator/utils/bazarr.py b/bazarr-ai-sub-generator/utils/bazarr.py
index dc110a7..722f53d 100644
--- a/bazarr-ai-sub-generator/utils/bazarr.py
+++ b/bazarr-ai-sub-generator/utils/bazarr.py
@@ -8,15 +8,19 @@ token = config._sections["bazarr"]["token"]
 base_url = config._sections["bazarr"]["url"]
 
 
-def get_wanted_episodes():
+def get_wanted_episodes(show: str=None):
     url = f"{base_url}/api/episodes/wanted"
 
     payload = {}
     headers = {"accept": "application/json", "X-API-KEY": token}
 
     response = requests.request("GET", url, headers=headers, data=payload)
-
-    return response.json()
+    
+    data = response.json()
+    if show != None:
+        data['data'] = [item for item in data['data'] if item['seriesTitle'] == show]
+        data['total'] = len(data['data'])
+    return data
 
 
 def get_episode_details(episode_id: str):

From 281b3cabc15770258793aa41a444bcac0c97a766 Mon Sep 17 00:00:00 2001
From: Karl <karl@k-world.me.uk>
Date: Sat, 13 Jul 2024 09:56:21 +0000
Subject: [PATCH 3/4] more cleanup

---
 .vscode/launch.json                        |  7 +++++--
 bazarr-ai-sub-generator/cli.py             |  1 -
 bazarr-ai-sub-generator/main.py            | 21 +++------------------
 bazarr-ai-sub-generator/utils/decorator.py | 13 +++++++++++++
 requirements.txt                           |  1 -
 setup.py                                   |  1 -
 6 files changed, 21 insertions(+), 23 deletions(-)
 create mode 100644 bazarr-ai-sub-generator/utils/decorator.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 6cacf66..ccbc084 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -11,12 +11,15 @@
             "program": "${file}",
             "console": "integratedTerminal",
             "justMyCode": false,
+            "env": {
+                "CUDA_VISIBLE_DEVICES": "1"
+            },
             "args": [
                 "--model",
                 "base",
                 "--show",
                 "Gary Neville's Soccerbox"
-            ],
+            ]
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py
index 8780f82..ad22bf6 100644
--- a/bazarr-ai-sub-generator/cli.py
+++ b/bazarr-ai-sub-generator/cli.py
@@ -77,7 +77,6 @@ def main():
         "--show",
         type=str,
         default=None,
-        #choices=["transcribe", "translate"],
         help="whether to perform X->X speech recognition ('transcribe') \
                               or X->English translation ('translate')",
     )
diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py
index 2fe1ba3..76d009b 100644
--- a/bazarr-ai-sub-generator/main.py
+++ b/bazarr-ai-sub-generator/main.py
@@ -7,27 +7,14 @@ from utils.ffmpeg import get_audio, add_subtitles_to_mp4
 from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
 from utils.sonarr import update_show_in_sonarr
 from utils.whisper import WhisperAI
-
-def measure_time(func):
-    def wrapper(*args, **kwargs):
-        start_time = time.time()
-        result = func(*args, **kwargs)
-        end_time = time.time()
-        duration = end_time - start_time
-        print(f"Function '{func.__name__}' executed in: {duration:.6f} seconds")
-        return result
-    return wrapper
-
-
+from utils.decorator import measure_time
 
 def process(args: dict):
     
     model_name: str = args.pop("model")
     language: str = args.pop("language")
     show: str = args.pop("show")
-    # sample_interval: str = args.pop("sample_interval")
-    # audio_channel: str = args.pop("audio_channel")
-
+    
     if model_name.endswith(".en"):
         warnings.warn(
             f"{model_name} is an English-only model, forcing English detection."
@@ -38,10 +25,8 @@ def process(args: dict):
         args["language"] = language
 
     model_args = {}
-    # model_args["model_size_or_path"] = model_name
     model_args["device"] = args.pop("device")
-    # model_args["compute_type"] = args.pop("compute_type")
-
+    
     list_of_episodes_needing_subtitles = get_wanted_episodes(show)
     print(
         f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
diff --git a/bazarr-ai-sub-generator/utils/decorator.py b/bazarr-ai-sub-generator/utils/decorator.py
new file mode 100644
index 0000000..d06a988
--- /dev/null
+++ b/bazarr-ai-sub-generator/utils/decorator.py
@@ -0,0 +1,13 @@
+import time
+from datetime import timedelta
+
+def measure_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        duration = end_time - start_time
+        human_readable_duration = str(timedelta(seconds=duration))
+        print(f"Function '{func.__name__}' executed in: {human_readable_duration}")
+        return result
+    return wrapper
diff --git a/requirements.txt b/requirements.txt
index ec34ef1..755a1a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-faster-whisper==0.10.0
 tqdm==4.56.0
 ffmpeg-python==0.2.0
 git+https://github.com/openai/whisper.git
diff --git a/setup.py b/setup.py
index 337aa84..96873f9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,6 @@ setup(
     py_modules=["bazarr-ai-sub-generator"],
     author="Karl Hudgell",
     install_requires=[
-        'faster-whisper',
         'tqdm',
         'ffmpeg-python'
     ],

From 966fa639c48d4011e2e056172be395af220dbfe8 Mon Sep 17 00:00:00 2001
From: Karl <karl@k-world.me.uk>
Date: Tue, 16 Jul 2024 07:31:28 +0000
Subject: [PATCH 4/4] more updates, working with whisper and faster-whipser

---
 .vscode/launch.json                           | 22 ++++--
 bazarr-ai-sub-generator/cli.py                | 18 -----
 bazarr-ai-sub-generator/main.py               | 16 +++--
 .../utils/faster_whisper.py                   | 68 +++++++++++++++++++
 bazarr-ai-sub-generator/utils/whisper.py      |  2 +-
 requirements.txt                              |  6 ++
 6 files changed, 102 insertions(+), 30 deletions(-)
 create mode 100644 bazarr-ai-sub-generator/utils/faster_whisper.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ccbc084..f9576a1 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -6,19 +6,31 @@
     "configurations": [
         {
             "name": "Python Debugger: Current File",
-            "type": "debugpy",
+            "type": "python",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
             "justMyCode": false,
             "env": {
-                "CUDA_VISIBLE_DEVICES": "1"
+                "CUDA_VISIBLE_DEVICES": "1",
+                "LD_LIBRARY_PATH": "/home/karl/faster-auto-subtitle/venv/lib/python3.11/site-packages/nvidia/cublas/lib:/home/karl/faster-auto-subtitle/venv/lib/python3.11/site-packages/nvidia/cudnn/lib"
             },
             "args": [
                 "--model",
-                "base",
-                "--show",
-                "Gary Neville's Soccerbox"
+                "base"
+            ]
+        },
+        {
+            "name": "Current (withenv)",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/run_with_env.sh",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "args": [
+                "${file}",
+                "--model",
+                "base"
             ]
         }
     ]
diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py
index ad22bf6..f788796 100644
--- a/bazarr-ai-sub-generator/cli.py
+++ b/bazarr-ai-sub-generator/cli.py
@@ -55,24 +55,6 @@ def main():
     #     help="Type to use for computation. \
     #                           See https://opennmt.net/CTranslate2/quantization.html.",
     # )
-    # parser.add_argument(
-    #     "--beam_size",
-    #     type=int,
-    #     default=5,
-    #     help="model parameter, tweak to increase accuracy",
-    # )
-    # parser.add_argument(
-    #     "--no_speech_threshold",
-    #     type=float,
-    #     default=0.6,
-    #     help="model parameter, tweak to increase accuracy",
-    # )
-    # parser.add_argument(
-    #     "--condition_on_previous_text",
-    #     type=str2bool,
-    #     default=True,
-    #     help="model parameter, tweak to increase accuracy",
-    # )
     parser.add_argument(
         "--show",
         type=str,
diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py
index 76d009b..c38fa16 100644
--- a/bazarr-ai-sub-generator/main.py
+++ b/bazarr-ai-sub-generator/main.py
@@ -6,6 +6,7 @@ from utils.files import filename, write_srt
 from utils.ffmpeg import get_audio, add_subtitles_to_mp4
 from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
 from utils.sonarr import update_show_in_sonarr
+# from utils.faster_whisper import WhisperAI
 from utils.whisper import WhisperAI
 from utils.decorator import measure_time
 
@@ -34,13 +35,16 @@ def process(args: dict):
     for episode in list_of_episodes_needing_subtitles["data"]:
         print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
         episode_data = get_episode_details(episode["sonarrEpisodeId"])
-        audios = get_audio([episode_data["path"]], 0, None)
-        subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
+        try:
+            audios = get_audio([episode_data["path"]], 0, None)
+            subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
 
-        add_subtitles_to_mp4(subtitles)
-        update_show_in_sonarr(episode["sonarrSeriesId"])
-        time.sleep(5)
-        sync_series()
+            add_subtitles_to_mp4(subtitles)
+            update_show_in_sonarr(episode["sonarrSeriesId"])
+            time.sleep(5)
+            sync_series()
+        except Exception as ex:
+            print(f"skipping file due to - {ex}")
 
 @measure_time
 def get_subtitles(
diff --git a/bazarr-ai-sub-generator/utils/faster_whisper.py b/bazarr-ai-sub-generator/utils/faster_whisper.py
new file mode 100644
index 0000000..a9700a8
--- /dev/null
+++ b/bazarr-ai-sub-generator/utils/faster_whisper.py
@@ -0,0 +1,68 @@
+import warnings
+import faster_whisper
+from tqdm import tqdm
+
+
+# pylint: disable=R0903
+class WhisperAI:
+    """
+    Wrapper class for the Whisper speech recognition model with additional functionality.
+
+    This class provides a high-level interface for transcribing audio files using the Whisper
+    speech recognition model. It encapsulates the model instantiation and transcription process,
+    allowing users to easily transcribe audio files and iterate over the resulting segments.
+
+    Usage:
+    ```python
+    whisper = WhisperAI(model_args, transcribe_args)
+
+    # Transcribe an audio file and iterate over the segments
+    for segment in whisper.transcribe(audio_path):
+        # Process each transcription segment
+        print(segment)
+    ```
+
+    Args:
+    - model_args: Arguments to pass to WhisperModel initialize method
+        - model_size_or_path (str): The name of the Whisper model to use.
+        - device (str): The device to use for computation ("cpu", "cuda", "auto").
+        - compute_type (str): The type to use for computation.
+            See https://opennmt.net/CTranslate2/quantization.html.
+    - transcribe_args (dict): Additional arguments to pass to the transcribe method.
+
+    Attributes:
+    - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
+    - transcribe_args (dict): Additional arguments used for transcribe method.
+
+    Methods:
+    - transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
+    """
+
+    def __init__(self, model_args: dict, transcribe_args: dict):
+        # self.model = faster_whisper.WhisperModel(**model_args)
+        model_size = "base"
+        self.model = faster_whisper.WhisperModel(model_size, device="cuda")
+        self.transcribe_args = transcribe_args
+
+    def transcribe(self, audio_path: str):
+        """
+        Transcribes the specified audio file and yields the resulting segments.
+
+        Args:
+        - audio_path (str): The path to the audio file for transcription.
+
+        Yields:
+        - faster_whisper.TranscriptionSegment: An individual transcription segment.
+        """
+        warnings.filterwarnings("ignore")
+        segments, info = self.model.transcribe(audio_path, beam_size=5)
+        warnings.filterwarnings("default")
+
+        # Same precision as the Whisper timestamps.
+        total_duration = round(info.duration, 2)
+
+        with tqdm(total=total_duration, unit=" seconds") as pbar:
+            for segment in segments:
+                yield segment
+                pbar.update(segment.end - segment.start)
+            pbar.update(0)
\ No newline at end of file
diff --git a/bazarr-ai-sub-generator/utils/whisper.py b/bazarr-ai-sub-generator/utils/whisper.py
index 6db019c..3f2fc9f 100644
--- a/bazarr-ai-sub-generator/utils/whisper.py
+++ b/bazarr-ai-sub-generator/utils/whisper.py
@@ -50,7 +50,7 @@ class WhisperAI:
         # Set device for computation
         self.device = torch.device(device)
         # Load the Whisper model with the specified size
-        self.model = whisper.load_model("base").to(self.device)
+        self.model = whisper.load_model("base.en").to(self.device)
         # Store the additional transcription arguments
         self.transcribe_args = transcribe_args
 
diff --git a/requirements.txt b/requirements.txt
index 755a1a7..9582f9b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,9 @@
 tqdm==4.56.0
 ffmpeg-python==0.2.0
 git+https://github.com/openai/whisper.git
+faster-whisper
+nvidia-cublas-cu12
+nvidia-cudnn-cu12
+nvidia-cublas-cu11
+nvidia-cudnn-cu11
+ctranslate2==3.24.0
\ No newline at end of file