diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 1c93443..3a201f9 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,24 +1,24 @@ -name: Pylint - -on: [push] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9"] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - pip install -r requirements.txt - - name: Analysing the code with pylint - run: | - pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py') +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install -r requirements.txt + - name: Analysing the code with pylint + run: | + pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py') diff --git a/.gitignore b/.gitignore index 689e0d9..7e860cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ -dist -.DS_Store -*.egg-info -build -__pycache__ -venv/ -test/ -.vscode/launch.json -config.cfg +dist +.DS_Store +*.egg-info +build +__pycache__ +venv/ +test/ +.vscode/launch.json +config.cfg diff --git a/.vscode/launch.json b/.vscode/launch.json index 94c9cc5..5fc0fe3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,20 +1,20 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python: Current File", - "type": "python", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal", - "justMyCode": false, - "args": [ - "--model", - "base", - ], - } - ] +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false, + "args": [ + "--model", + "base", + ], + } + ] } \ No newline at end of file diff --git a/LICENSE b/LICENSE index 3e586f4..1345904 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,22 @@ -MIT License - -Copyright (c) 2022-2024 Miguel Piedrafita -Copyright (c) 2024 Sergey Chernyaev - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2022-2024 Miguel Piedrafita +Copyright (c) 2024 Sergey Chernyaev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 59eeb8e..db40dc0 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,50 @@ -# bazarr-ai-sub-generator - -This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation. - -This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. - -This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles. - -It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file. - -Clunky, and slow, but works. - -## Installation - - -## Usage - - +# bazarr-ai-sub-generator + +This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation. + +This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. + +This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles. + +It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file. + +Clunky, and slow, but works. + +## Installation + + +## Usage + + diff --git a/bazarr-ai-sub-generator/cli.py b/bazarr-ai-sub-generator/cli.py index d1436b3..4cbec9f 100644 --- a/bazarr-ai-sub-generator/cli.py +++ b/bazarr-ai-sub-generator/cli.py @@ -1,99 +1,106 @@ -import argparse -from faster_whisper import available_models -from utils.constants import LANGUAGE_CODES -from main import process -from utils.convert import str2bool, str2timeinterval - - -def main(): - """ - Main entry point for the script. - - Parses command line arguments, processes the inputs using the specified options, - and performs transcription or translation based on the specified task. - """ - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument( - "--audio_channel", default="0", type=int, help="audio channel index to use" - ) - parser.add_argument( - "--sample_interval", - type=str2timeinterval, - default=None, - help="generate subtitles for a specific \ - fragment of the video (e.g. 01:02:05-01:03:45)", - ) - parser.add_argument( - "--model", - default="small", - choices=available_models(), - help="name of the Whisper model to use", - ) - parser.add_argument( - "--device", - type=str, - default="auto", - choices=["cpu", "cuda", "auto"], - help='Device to use for computation ("cpu", "cuda", "auto")', - ) - parser.add_argument( - "--compute_type", - type=str, - default="default", - choices=[ - "int8", - "int8_float32", - "int8_float16", - "int8_bfloat16", - "int16", - "float16", - "bfloat16", - "float32", - ], - help="Type to use for computation. \ - See https://opennmt.net/CTranslate2/quantization.html.", - ) - parser.add_argument( - "--beam_size", - type=int, - default=5, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--no_speech_threshold", - type=float, - default=0.6, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--condition_on_previous_text", - type=str2bool, - default=True, - help="model parameter, tweak to increase accuracy", - ) - parser.add_argument( - "--task", - type=str, - default="transcribe", - choices=["transcribe", "translate"], - help="whether to perform X->X speech recognition ('transcribe') \ - or X->English translation ('translate')", - ) - parser.add_argument( - "--language", - type=str, - default="auto", - choices=LANGUAGE_CODES, - help="What is the origin language of the video? \ - If unset, it is detected automatically.", - ) - - args = parser.parse_args().__dict__ - - process(args) - - -if __name__ == "__main__": - main() +import argparse +from faster_whisper import available_models +from utils.constants import LANGUAGE_CODES +from main import process +from utils.convert import str2bool, str2timeinterval + + +def main(): + """ + Main entry point for the script. + + Parses command line arguments, processes the inputs using the specified options, + and performs transcription or translation based on the specified task. + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--audio_channel", default="0", type=int, help="audio channel index to use" + ) + parser.add_argument( + "--sample_interval", + type=str2timeinterval, + default=None, + help="generate subtitles for a specific \ + fragment of the video (e.g. 01:02:05-01:03:45)", + ) + parser.add_argument( + "--model", + default="small", + choices=available_models(), + help="name of the Whisper model to use", + ) + parser.add_argument( + "--device", + type=str, + default="auto", + choices=["cpu", "cuda", "auto"], + help='Device to use for computation ("cpu", "cuda", "auto")', + ) + parser.add_argument( + "--compute_type", + type=str, + default="default", + choices=[ + "int8", + "int8_float32", + "int8_float16", + "int8_bfloat16", + "int16", + "float16", + "bfloat16", + "float32", + ], + help="Type to use for computation. \ + See https://opennmt.net/CTranslate2/quantization.html.", + ) + parser.add_argument( + "--beam_size", + type=int, + default=5, + help="model parameter, tweak to increase accuracy", + ) + parser.add_argument( + "--no_speech_threshold", + type=float, + default=0.6, + help="model parameter, tweak to increase accuracy", + ) + parser.add_argument( + "--condition_on_previous_text", + type=str2bool, + default=True, + help="model parameter, tweak to increase accuracy", + ) + parser.add_argument( + "--task", + type=str, + default="transcribe", + choices=["transcribe", "translate"], + help="whether to perform X->X speech recognition ('transcribe') \ + or X->English translation ('translate')", + ) + parser.add_argument( + "--language", + type=str, + default="auto", + choices=LANGUAGE_CODES, + help="What is the origin language of the video? \ + If unset, it is detected automatically.", + ) + parser.add_argument( + "--workers", + type=int, + default=1, + help="Number of concurrent workers for processing episodes. \ + Increase for better CUDA utilization with multiple episodes.", + ) + + args = parser.parse_args().__dict__ + + process(args) + + +if __name__ == "__main__": + main() diff --git a/bazarr-ai-sub-generator/main.py b/bazarr-ai-sub-generator/main.py index 64074a8..dccd2fe 100644 --- a/bazarr-ai-sub-generator/main.py +++ b/bazarr-ai-sub-generator/main.py @@ -1,66 +1,122 @@ -import os -import warnings -import tempfile -import time -from utils.files import filename, write_srt -from utils.ffmpeg import get_audio, add_subtitles_to_mp4 -from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series -from utils.sonarr import update_show_in_sonarr -from utils.whisper import WhisperAI - - -def process(args: dict): - model_name: str = args.pop("model") - language: str = args.pop("language") - sample_interval: str = args.pop("sample_interval") - audio_channel: str = args.pop("audio_channel") - - if model_name.endswith(".en"): - warnings.warn( - f"{model_name} is an English-only model, forcing English detection." - ) - args["language"] = "en" - # if translate task used and language argument is set, then use it - elif language != "auto": - args["language"] = language - - model_args = {} - model_args["model_size_or_path"] = model_name - model_args["device"] = args.pop("device") - model_args["compute_type"] = args.pop("compute_type") - - list_of_episodes_needing_subtitles = get_wanted_episodes() - print( - f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." - ) - for episode in list_of_episodes_needing_subtitles["data"]: - print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}") - episode_data = get_episode_details(episode["sonarrEpisodeId"]) - audios = get_audio([episode_data["path"]], audio_channel, sample_interval) - subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) - - add_subtitles_to_mp4(subtitles) - update_show_in_sonarr(episode["sonarrSeriesId"]) - time.sleep(5) - sync_series() - - -def get_subtitles( - audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict -): - model = WhisperAI(model_args, transcribe_args) - - subtitles_path = {} - - for path, audio_path in audio_paths.items(): - print(f"Generating subtitles for {filename(path)}... This might take a while.") - srt_path = os.path.join(output_dir, f"{filename(path)}.srt") - - segments = model.transcribe(audio_path) - - with open(srt_path, "w", encoding="utf-8") as srt: - write_srt(segments, file=srt) - - subtitles_path[path] = srt_path - - return subtitles_path +import os +import warnings +import tempfile +import time +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed +from utils.files import filename, write_srt +from utils.ffmpeg import get_audio, add_subtitles_to_mp4 +from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series +from utils.sonarr import update_show_in_sonarr +from utils.whisper import WhisperAI + + +def process_episode(episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes): + """Process a single episode for subtitle generation.""" + episode_id = episode["sonarrEpisodeId"] + + try: + # Double-check that this episode is still wanted before processing + current_wanted = get_wanted_episodes() + still_wanted = any(ep["sonarrEpisodeId"] == episode_id for ep in current_wanted["data"]) + + if not still_wanted: + processing_episodes.discard(episode_id) + return f"Skipped (no longer wanted): {episode['seriesTitle']} - {episode['episode_number']}" + + print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}") + episode_data = get_episode_details(episode_id) + audios = get_audio([episode_data["path"]], audio_channel, sample_interval) + subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) + + add_subtitles_to_mp4(subtitles) + update_show_in_sonarr(episode["sonarrSeriesId"]) + time.sleep(5) + sync_series() + + processing_episodes.discard(episode_id) + completed_episodes.append(episode_id) + return f"Completed: {episode['seriesTitle']} - {episode['episode_number']}" + except Exception as e: + processing_episodes.discard(episode_id) + return f"Failed {episode['seriesTitle']} - {episode['episode_number']}: {str(e)}" + + +def process(args: dict): + model_name: str = args.pop("model") + language: str = args.pop("language") + sample_interval: str = args.pop("sample_interval") + audio_channel: str = args.pop("audio_channel") + workers: int = args.pop("workers", 1) + + if model_name.endswith(".en"): + warnings.warn( + f"{model_name} is an English-only model, forcing English detection." + ) + args["language"] = "en" + # if translate task used and language argument is set, then use it + elif language != "auto": + args["language"] = language + + model_args = {} + model_args["model_size_or_path"] = model_name + model_args["device"] = args.pop("device") + model_args["compute_type"] = args.pop("compute_type") + + list_of_episodes_needing_subtitles = get_wanted_episodes() + print( + f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." + ) + print(f"Processing with {workers} concurrent worker(s)...") + + # Thread-safe tracking of episodes being processed and completed + processing_episodes = set() + completed_episodes_list = [] + total_episodes = len(list_of_episodes_needing_subtitles["data"]) + + # Filter episodes to avoid duplicates and respect concurrent processing limits + episodes_to_process = [] + for episode in list_of_episodes_needing_subtitles["data"]: + episode_id = episode["sonarrEpisodeId"] + if episode_id not in processing_episodes: + processing_episodes.add(episode_id) + episodes_to_process.append(episode) + + print(f"Starting processing of {len(episodes_to_process)} unique episodes...") + + with ThreadPoolExecutor(max_workers=workers) as executor: + # Submit episodes for processing with tracking sets + future_to_episode = { + executor.submit(process_episode, episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes_list): episode + for episode in episodes_to_process + } + + # Collect results as they complete + completed_count = 0 + for future in as_completed(future_to_episode): + completed_count += 1 + result = future.result() + print(f"[{completed_count}/{total_episodes}] {result}") + + print(f"Processing complete. {len(completed_episodes_list)} episodes processed successfully.") + + +def get_subtitles( + audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict +): + model = WhisperAI(model_args, transcribe_args) + + subtitles_path = {} + + for path, audio_path in audio_paths.items(): + print(f"Generating subtitles for {filename(path)}... This might take a while.") + srt_path = os.path.join(output_dir, f"{filename(path)}.srt") + + segments = model.transcribe(audio_path) + + with open(srt_path, "w", encoding="utf-8") as srt: + write_srt(segments, file=srt) + + subtitles_path[path] = srt_path + + return subtitles_path diff --git a/bazarr-ai-sub-generator/utils/bazarr.py b/bazarr-ai-sub-generator/utils/bazarr.py index dc110a7..f86517b 100644 --- a/bazarr-ai-sub-generator/utils/bazarr.py +++ b/bazarr-ai-sub-generator/utils/bazarr.py @@ -1,40 +1,40 @@ -import requests -import configparser - -config = configparser.RawConfigParser() -config.read("config.cfg") - -token = config._sections["bazarr"]["token"] -base_url = config._sections["bazarr"]["url"] - - -def get_wanted_episodes(): - url = f"{base_url}/api/episodes/wanted" - - payload = {} - headers = {"accept": "application/json", "X-API-KEY": token} - - response = requests.request("GET", url, headers=headers, data=payload) - - return response.json() - - -def get_episode_details(episode_id: str): - url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}" - - payload = {} - headers = {"accept": "application/json", "X-API-KEY": token} - - response = requests.request("GET", url, headers=headers, data=payload) - return response.json()["data"][0] - - -def sync_series(): - url = f"{base_url}/api/system/tasks?taskid=update_series" - - payload = {} - headers = {"accept": "application/json", "X-API-KEY": token} - - response = requests.request("POST", url, headers=headers, data=payload) - if response.status_code == 204: - print("Updated Bazarr") +import requests +import configparser + +config = configparser.RawConfigParser() +config.read("config.cfg") + +token = config._sections["bazarr"]["token"] +base_url = config._sections["bazarr"]["url"] + + +def get_wanted_episodes(): + url = f"{base_url}/api/episodes/wanted" + + payload = {} + headers = {"accept": "application/json", "X-API-KEY": token} + + response = requests.request("GET", url, headers=headers, data=payload) + + return response.json() + + +def get_episode_details(episode_id: str): + url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}" + + payload = {} + headers = {"accept": "application/json", "X-API-KEY": token} + + response = requests.request("GET", url, headers=headers, data=payload) + return response.json()["data"][0] + + +def sync_series(): + url = f"{base_url}/api/system/tasks?taskid=update_series" + + payload = {} + headers = {"accept": "application/json", "X-API-KEY": token} + + response = requests.request("POST", url, headers=headers, data=payload) + if response.status_code == 204: + print("Updated Bazarr") diff --git a/bazarr-ai-sub-generator/utils/constants.py b/bazarr-ai-sub-generator/utils/constants.py index 993556f..97c1851 100644 --- a/bazarr-ai-sub-generator/utils/constants.py +++ b/bazarr-ai-sub-generator/utils/constants.py @@ -1,105 +1,105 @@ -""" -List of available language codes -""" -LANGUAGE_CODES = [ - "af", - "am", - "ar", - "as", - "az", - "ba", - "be", - "bg", - "bn", - "bo", - "br", - "bs", - "ca", - "cs", - "cy", - "da", - "de", - "el", - "en", - "es", - "et", - "eu", - "fa", - "fi", - "fo", - "fr", - "gl", - "gu", - "ha", - "haw", - "he", - "hi", - "hr", - "ht", - "hu", - "hy", - "id", - "is", - "it", - "ja", - "jw", - "ka", - "kk", - "km", - "kn", - "ko", - "la", - "lb", - "ln", - "lo", - "lt", - "lv", - "mg", - "mi", - "mk", - "ml", - "mn", - "mr", - "ms", - "mt", - "my", - "ne", - "nl", - "nn", - "no", - "oc", - "pa", - "pl", - "ps", - "pt", - "ro", - "ru", - "sa", - "sd", - "si", - "sk", - "sl", - "sn", - "so", - "sq", - "sr", - "su", - "sv", - "sw", - "ta", - "te", - "tg", - "th", - "tk", - "tl", - "tr", - "tt", - "uk", - "ur", - "uz", - "vi", - "yi", - "yo", - "zh", - "yue", -] +""" +List of available language codes +""" +LANGUAGE_CODES = [ + "af", + "am", + "ar", + "as", + "az", + "ba", + "be", + "bg", + "bn", + "bo", + "br", + "bs", + "ca", + "cs", + "cy", + "da", + "de", + "el", + "en", + "es", + "et", + "eu", + "fa", + "fi", + "fo", + "fr", + "gl", + "gu", + "ha", + "haw", + "he", + "hi", + "hr", + "ht", + "hu", + "hy", + "id", + "is", + "it", + "ja", + "jw", + "ka", + "kk", + "km", + "kn", + "ko", + "la", + "lb", + "ln", + "lo", + "lt", + "lv", + "mg", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "ne", + "nl", + "nn", + "no", + "oc", + "pa", + "pl", + "ps", + "pt", + "ro", + "ru", + "sa", + "sd", + "si", + "sk", + "sl", + "sn", + "so", + "sq", + "sr", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "tk", + "tl", + "tr", + "tt", + "uk", + "ur", + "uz", + "vi", + "yi", + "yo", + "zh", + "yue", +] diff --git a/bazarr-ai-sub-generator/utils/convert.py b/bazarr-ai-sub-generator/utils/convert.py index 2df28e2..d03de96 100644 --- a/bazarr-ai-sub-generator/utils/convert.py +++ b/bazarr-ai-sub-generator/utils/convert.py @@ -1,92 +1,92 @@ -from datetime import datetime, timedelta - - -def str2bool(string: str): - string = string.lower() - str2val = {"true": True, "false": False} - - if string in str2val: - return str2val[string] - - raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") - - -def str2timeinterval(string: str): - if string is None: - return None - - if "-" not in string: - raise ValueError( - f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" - ) - - intervals = string.split("-") - if len(intervals) != 2: - raise ValueError( - f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" - ) - - start = try_parse_timestamp(intervals[0]) - end = try_parse_timestamp(intervals[1]) - if start >= end: - raise ValueError( - f"Expected time interval end to be higher than start, got {start} >= {end}" - ) - - return [start, end] - - -def time_to_timestamp(string: str): - split_time = string.split(":") - if ( - len(split_time) == 0 - or len(split_time) > 3 - or not all(x.isdigit() for x in split_time) - ): - raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}") - - if len(split_time) == 1: - return int(split_time[0]) - - if len(split_time) == 2: - return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 - - return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2]) - - -def try_parse_timestamp(string: str): - timestamp = parse_timestamp(string, "%H:%M:%S") - if timestamp is not None: - return timestamp - - timestamp = parse_timestamp(string, "%H:%M") - if timestamp is not None: - return timestamp - - return parse_timestamp(string, "%S") - - -def parse_timestamp(string: str, pattern: str): - try: - date = datetime.strptime(string, pattern) - delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second) - return int(delta.total_seconds()) - except: # pylint: disable=bare-except - return None - - -def format_timestamp(seconds: float, always_include_hours: bool = False): - assert seconds >= 0, "non-negative timestamp expected" - milliseconds = round(seconds * 1000.0) - - hours = milliseconds // 3_600_000 - milliseconds -= hours * 3_600_000 - - minutes = milliseconds // 60_000 - milliseconds -= minutes * 60_000 - - seconds = milliseconds // 1_000 - milliseconds -= seconds * 1_000 - - hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" - return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" +from datetime import datetime, timedelta + + +def str2bool(string: str): + string = string.lower() + str2val = {"true": True, "false": False} + + if string in str2val: + return str2val[string] + + raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") + + +def str2timeinterval(string: str): + if string is None: + return None + + if "-" not in string: + raise ValueError( + f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" + ) + + intervals = string.split("-") + if len(intervals) != 2: + raise ValueError( + f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" + ) + + start = try_parse_timestamp(intervals[0]) + end = try_parse_timestamp(intervals[1]) + if start >= end: + raise ValueError( + f"Expected time interval end to be higher than start, got {start} >= {end}" + ) + + return [start, end] + + +def time_to_timestamp(string: str): + split_time = string.split(":") + if ( + len(split_time) == 0 + or len(split_time) > 3 + or not all(x.isdigit() for x in split_time) + ): + raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}") + + if len(split_time) == 1: + return int(split_time[0]) + + if len(split_time) == 2: + return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + + return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2]) + + +def try_parse_timestamp(string: str): + timestamp = parse_timestamp(string, "%H:%M:%S") + if timestamp is not None: + return timestamp + + timestamp = parse_timestamp(string, "%H:%M") + if timestamp is not None: + return timestamp + + return parse_timestamp(string, "%S") + + +def parse_timestamp(string: str, pattern: str): + try: + date = datetime.strptime(string, pattern) + delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second) + return int(delta.total_seconds()) + except: # pylint: disable=bare-except + return None + + +def format_timestamp(seconds: float, always_include_hours: bool = False): + assert seconds >= 0, "non-negative timestamp expected" + milliseconds = round(seconds * 1000.0) + + hours = milliseconds // 3_600_000 + milliseconds -= hours * 3_600_000 + + minutes = milliseconds // 60_000 + milliseconds -= minutes * 60_000 + + seconds = milliseconds // 1_000 + milliseconds -= seconds * 1_000 + + hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" + return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" diff --git a/bazarr-ai-sub-generator/utils/ffmpeg.py b/bazarr-ai-sub-generator/utils/ffmpeg.py index 190b8fe..cf98412 100644 --- a/bazarr-ai-sub-generator/utils/ffmpeg.py +++ b/bazarr-ai-sub-generator/utils/ffmpeg.py @@ -1,59 +1,59 @@ -import os -import tempfile -import ffmpeg -from .files import filename - - -def get_audio(paths: list, audio_channel_index: int, sample_interval: list): - temp_dir = tempfile.gettempdir() - - audio_paths = {} - - for path in paths: - print(f"Extracting audio from {filename(path)}...") - output_path = os.path.join(temp_dir, f"{filename(path)}.wav") - - ffmpeg_input_args = {} - if sample_interval is not None: - ffmpeg_input_args["ss"] = str(sample_interval[0]) - - ffmpeg_output_args = {} - ffmpeg_output_args["acodec"] = "pcm_s16le" - ffmpeg_output_args["ac"] = "1" - ffmpeg_output_args["ar"] = "16k" - ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index) - if sample_interval is not None: - ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0]) - - ffmpeg.input(path, **ffmpeg_input_args).output( - output_path, **ffmpeg_output_args - ).run(quiet=True, overwrite_output=True) - - audio_paths[path] = output_path - - return audio_paths - - -def add_subtitles_to_mp4(subtitles: dict): - input_file = list(subtitles.keys())[0] - subtitle_file = subtitles[input_file] - output_file = input_file - os.rename(input_file, input_file + "_edit") - - input_stream = ffmpeg.input(input_file + "_edit") - subtitle_stream = ffmpeg.input(subtitle_file) - - # Combine input video and subtitle - output = ffmpeg.output( - input_stream, - subtitle_stream, - output_file.replace(".mkv", ".mp4"), - c="copy", - **{"c:s": "mov_text"}, - **{"metadata:s:s:0": "language=eng"}, - ) - ffmpeg.run(output, quiet=True, overwrite_output=True) - os.remove(input_file + "_edit") - # remove tempfiles - os.remove(subtitle_file) - os.remove(subtitle_file.replace(".srt", ".wav")) +import os +import tempfile +import ffmpeg +from .files import filename + + +def get_audio(paths: list, audio_channel_index: int, sample_interval: list): + temp_dir = tempfile.gettempdir() + + audio_paths = {} + + for path in paths: + print(f"Extracting audio from {filename(path)}...") + output_path = os.path.join(temp_dir, f"{filename(path)}.wav") + + ffmpeg_input_args = {} + if sample_interval is not None: + ffmpeg_input_args["ss"] = str(sample_interval[0]) + + ffmpeg_output_args = {} + ffmpeg_output_args["acodec"] = "pcm_s16le" + ffmpeg_output_args["ac"] = "1" + ffmpeg_output_args["ar"] = "16k" + ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index) + if sample_interval is not None: + ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0]) + + ffmpeg.input(path, **ffmpeg_input_args).output( + output_path, **ffmpeg_output_args + ).run(quiet=True, overwrite_output=True) + + audio_paths[path] = output_path + + return audio_paths + + +def add_subtitles_to_mp4(subtitles: dict): + input_file = list(subtitles.keys())[0] + subtitle_file = subtitles[input_file] + output_file = input_file + os.rename(input_file, input_file + "_edit") + + input_stream = ffmpeg.input(input_file + "_edit") + subtitle_stream = ffmpeg.input(subtitle_file) + + # Combine input video and subtitle + output = ffmpeg.output( + input_stream, + subtitle_stream, + output_file.replace(".mkv", ".mp4"), + c="copy", + **{"c:s": "mov_text"}, + **{"metadata:s:s:0": "language=eng"}, + ) + ffmpeg.run(output, quiet=True, overwrite_output=True) + os.remove(input_file + "_edit") + # remove tempfiles + os.remove(subtitle_file) + os.remove(subtitle_file.replace(".srt", ".wav")) diff --git a/bazarr-ai-sub-generator/utils/files.py b/bazarr-ai-sub-generator/utils/files.py index ea40253..20b9230 100644 --- a/bazarr-ai-sub-generator/utils/files.py +++ b/bazarr-ai-sub-generator/utils/files.py @@ -1,19 +1,19 @@ -import os -from typing import Iterator, TextIO -from .convert import format_timestamp - - -def write_srt(transcript: Iterator[dict], file: TextIO): - for i, segment in enumerate(transcript, start=1): - print( - f"{i}\n" - f"{format_timestamp(segment.start, always_include_hours=True)} --> " - f"{format_timestamp(segment.end, always_include_hours=True)}\n" - f"{segment.text.strip().replace('-->', '->')}\n", - file=file, - flush=True, - ) - - -def filename(path: str): - return os.path.splitext(os.path.basename(path))[0] +import os +from typing import Iterator, TextIO +from .convert import format_timestamp + + +def write_srt(transcript: Iterator[dict], file: TextIO): + for i, segment in enumerate(transcript, start=1): + print( + f"{i}\n" + f"{format_timestamp(segment.start, always_include_hours=True)} --> " + f"{format_timestamp(segment.end, always_include_hours=True)}\n" + f"{segment.text.strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + + +def filename(path: str): + return os.path.splitext(os.path.basename(path))[0] diff --git a/bazarr-ai-sub-generator/utils/sonarr.py b/bazarr-ai-sub-generator/utils/sonarr.py index 9f9f277..26a7010 100644 --- a/bazarr-ai-sub-generator/utils/sonarr.py +++ b/bazarr-ai-sub-generator/utils/sonarr.py @@ -1,24 +1,24 @@ -import requests -import json -import configparser - -config = configparser.RawConfigParser() -config.read("config.cfg") - -token = config._sections["sonarr"]["token"] -base_url = config._sections["sonarr"]["url"] - - -def update_show_in_sonarr(show_id): - url = f"{base_url}/api/v3/command" - - payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id}) - headers = { - "Content-Type": "application/json", - "X-Api-Key": token, - } - - response = requests.request("POST", url, headers=headers, data=payload) - - if response.status_code != 404: - print("Updated show in Sonarr") +import requests +import json +import configparser + +config = configparser.RawConfigParser() +config.read("config.cfg") + +token = config._sections["sonarr"]["token"] +base_url = config._sections["sonarr"]["url"] + + +def update_show_in_sonarr(show_id): + url = f"{base_url}/api/v3/command" + + payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id}) + headers = { + "Content-Type": "application/json", + "X-Api-Key": token, + } + + response = requests.request("POST", url, headers=headers, data=payload) + + if response.status_code != 404: + print("Updated show in Sonarr") diff --git a/bazarr-ai-sub-generator/utils/whisper.py b/bazarr-ai-sub-generator/utils/whisper.py index 5e823b1..c7236c0 100644 --- a/bazarr-ai-sub-generator/utils/whisper.py +++ b/bazarr-ai-sub-generator/utils/whisper.py @@ -1,66 +1,66 @@ -import warnings -import faster_whisper -from tqdm import tqdm - - -# pylint: disable=R0903 -class WhisperAI: - """ - Wrapper class for the Whisper speech recognition model with additional functionality. - - This class provides a high-level interface for transcribing audio files using the Whisper - speech recognition model. It encapsulates the model instantiation and transcription process, - allowing users to easily transcribe audio files and iterate over the resulting segments. - - Usage: - ```python - whisper = WhisperAI(model_args, transcribe_args) - - # Transcribe an audio file and iterate over the segments - for segment in whisper.transcribe(audio_path): - # Process each transcription segment - print(segment) - ``` - - Args: - - model_args: Arguments to pass to WhisperModel initialize method - - model_size_or_path (str): The name of the Whisper model to use. - - device (str): The device to use for computation ("cpu", "cuda", "auto"). - - compute_type (str): The type to use for computation. - See https://opennmt.net/CTranslate2/quantization.html. - - transcribe_args (dict): Additional arguments to pass to the transcribe method. - - Attributes: - - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model. - - transcribe_args (dict): Additional arguments used for transcribe method. - - Methods: - - transcribe(audio_path): Transcribes an audio file and yields the resulting segments. - """ - - def __init__(self, model_args: dict, transcribe_args: dict): - self.model = faster_whisper.WhisperModel(**model_args) - self.transcribe_args = transcribe_args - - def transcribe(self, audio_path: str): - """ - Transcribes the specified audio file and yields the resulting segments. - - Args: - - audio_path (str): The path to the audio file for transcription. - - Yields: - - faster_whisper.TranscriptionSegment: An individual transcription segment. - """ - warnings.filterwarnings("ignore") - segments, info = self.model.transcribe(audio_path, **self.transcribe_args) - warnings.filterwarnings("default") - - # Same precision as the Whisper timestamps. - total_duration = round(info.duration, 2) - - with tqdm(total=total_duration, unit=" seconds") as pbar: - for segment in segments: - yield segment - pbar.update(segment.end - segment.start) - pbar.update(0) +import warnings +import faster_whisper +from tqdm import tqdm + + +# pylint: disable=R0903 +class WhisperAI: + """ + Wrapper class for the Whisper speech recognition model with additional functionality. + + This class provides a high-level interface for transcribing audio files using the Whisper + speech recognition model. It encapsulates the model instantiation and transcription process, + allowing users to easily transcribe audio files and iterate over the resulting segments. + + Usage: + ```python + whisper = WhisperAI(model_args, transcribe_args) + + # Transcribe an audio file and iterate over the segments + for segment in whisper.transcribe(audio_path): + # Process each transcription segment + print(segment) + ``` + + Args: + - model_args: Arguments to pass to WhisperModel initialize method + - model_size_or_path (str): The name of the Whisper model to use. + - device (str): The device to use for computation ("cpu", "cuda", "auto"). + - compute_type (str): The type to use for computation. + See https://opennmt.net/CTranslate2/quantization.html. + - transcribe_args (dict): Additional arguments to pass to the transcribe method. + + Attributes: + - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model. + - transcribe_args (dict): Additional arguments used for transcribe method. + + Methods: + - transcribe(audio_path): Transcribes an audio file and yields the resulting segments. + """ + + def __init__(self, model_args: dict, transcribe_args: dict): + self.model = faster_whisper.WhisperModel(**model_args) + self.transcribe_args = transcribe_args + + def transcribe(self, audio_path: str): + """ + Transcribes the specified audio file and yields the resulting segments. + + Args: + - audio_path (str): The path to the audio file for transcription. + + Yields: + - faster_whisper.TranscriptionSegment: An individual transcription segment. + """ + warnings.filterwarnings("ignore") + segments, info = self.model.transcribe(audio_path, **self.transcribe_args) + warnings.filterwarnings("default") + + # Same precision as the Whisper timestamps. + total_duration = round(info.duration, 2) + + with tqdm(total=total_duration, unit=" seconds") as pbar: + for segment in segments: + yield segment + pbar.update(segment.end - segment.start) + pbar.update(0) diff --git a/config.cfg.example b/config.cfg.example index c51ce0f..e67dfe9 100644 --- a/config.cfg.example +++ b/config.cfg.example @@ -1,6 +1,6 @@ -[bazarr] -url = http://1.1.1.1 -token = djfkjadncdfjkanvfjkvandfj -[sonarr] -url = http://2.2.2.2:8989 +[bazarr] +url = http://1.1.1.1 +token = djfkjadncdfjkanvfjkvandfj +[sonarr] +url = http://2.2.2.2:8989 token = dfifdmnajcdnjcvaldnjlk \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index eab95da..494c2c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -faster-whisper==0.10.0 -tqdm==4.56.0 +faster-whisper==0.10.0 +tqdm==4.56.0 ffmpeg-python==0.2.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 337aa84..c958418 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,19 @@ -from setuptools import setup, find_packages - -setup( - version="1.0", - name="bazarr-ai-sub-generator", - packages=find_packages(), - py_modules=["bazarr-ai-sub-generator"], - author="Karl Hudgell", - install_requires=[ - 'faster-whisper', - 'tqdm', - 'ffmpeg-python' - ], - description="Automatically generate and embed subtitles into your videos", - entry_points={ - 'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'], - }, - include_package_data=True, -) +from setuptools import setup, find_packages + +setup( + version="1.0", + name="bazarr-ai-sub-generator", + packages=find_packages(), + py_modules=["bazarr-ai-sub-generator"], + author="Karl Hudgell", + install_requires=[ + 'faster-whisper', + 'tqdm', + 'ffmpeg-python' + ], + description="Automatically generate and embed subtitles into your videos", + entry_points={ + 'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'], + }, + include_package_data=True, +)