latest commit

This commit is contained in:
Karl 2025-10-19 11:53:37 +01:00
parent 77b28df03d
commit 5b27fdbc75
17 changed files with 783 additions and 720 deletions

View File

@ -1,24 +1,24 @@
name: Pylint name: Pylint
on: [push] on: [push]
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
python-version: ["3.9"] python-version: ["3.9"]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3 uses: actions/setup-python@v3
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pylint pip install pylint
pip install -r requirements.txt pip install -r requirements.txt
- name: Analysing the code with pylint - name: Analysing the code with pylint
run: | run: |
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py') pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')

18
.gitignore vendored
View File

@ -1,9 +1,9 @@
dist dist
.DS_Store .DS_Store
*.egg-info *.egg-info
build build
__pycache__ __pycache__
venv/ venv/
test/ test/
.vscode/launch.json .vscode/launch.json
config.cfg config.cfg

38
.vscode/launch.json vendored
View File

@ -1,20 +1,20 @@
{ {
// Use IntelliSense to learn about possible attributes. // Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes. // Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0", "version": "0.2.0",
"configurations": [ "configurations": [
{ {
"name": "Python: Current File", "name": "Python: Current File",
"type": "python", "type": "python",
"request": "launch", "request": "launch",
"program": "${file}", "program": "${file}",
"console": "integratedTerminal", "console": "integratedTerminal",
"justMyCode": false, "justMyCode": false,
"args": [ "args": [
"--model", "--model",
"base", "base",
], ],
} }
] ]
} }

44
LICENSE
View File

@ -1,22 +1,22 @@
MIT License MIT License
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com> Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com> Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions: furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software. copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.

100
README.md
View File

@ -1,50 +1,50 @@
# bazarr-ai-sub-generator # bazarr-ai-sub-generator
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation. This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles. This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file. It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
Clunky, and slow, but works. Clunky, and slow, but works.
## Installation ## Installation
## Usage ## Usage
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles. <!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
faster_auto_subtitle /path/to/video.mp4 -o subtitled/ faster_auto_subtitle /path/to/video.mp4 -o subtitled/
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`. The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
faster_auto_subtitle /path/to/video.mp4 --model medium faster_auto_subtitle /path/to/video.mp4 --model medium
Adding `--task translate` will translate the subtitles into English: Adding `--task translate` will translate the subtitles into English:
faster_auto_subtitle /path/to/video.mp4 --task translate faster_auto_subtitle /path/to/video.mp4 --task translate
Run the following to view all available options: Run the following to view all available options:
faster_auto_subtitle --help faster_auto_subtitle --help
## Tips ## Tips
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy. The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
Higher `beam_size` usually leads to greater accuracy, but slows down the process. Higher `beam_size` usually leads to greater accuracy, but slows down the process.
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it. Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start. In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters: You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7 faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
## License ## License
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. --> This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->

View File

@ -1,99 +1,106 @@
import argparse import argparse
from faster_whisper import available_models from faster_whisper import available_models
from utils.constants import LANGUAGE_CODES from utils.constants import LANGUAGE_CODES
from main import process from main import process
from utils.convert import str2bool, str2timeinterval from utils.convert import str2bool, str2timeinterval
def main(): def main():
""" """
Main entry point for the script. Main entry point for the script.
Parses command line arguments, processes the inputs using the specified options, Parses command line arguments, processes the inputs using the specified options,
and performs transcription or translation based on the specified task. and performs transcription or translation based on the specified task.
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter formatter_class=argparse.ArgumentDefaultsHelpFormatter
) )
parser.add_argument( parser.add_argument(
"--audio_channel", default="0", type=int, help="audio channel index to use" "--audio_channel", default="0", type=int, help="audio channel index to use"
) )
parser.add_argument( parser.add_argument(
"--sample_interval", "--sample_interval",
type=str2timeinterval, type=str2timeinterval,
default=None, default=None,
help="generate subtitles for a specific \ help="generate subtitles for a specific \
fragment of the video (e.g. 01:02:05-01:03:45)", fragment of the video (e.g. 01:02:05-01:03:45)",
) )
parser.add_argument( parser.add_argument(
"--model", "--model",
default="small", default="small",
choices=available_models(), choices=available_models(),
help="name of the Whisper model to use", help="name of the Whisper model to use",
) )
parser.add_argument( parser.add_argument(
"--device", "--device",
type=str, type=str,
default="auto", default="auto",
choices=["cpu", "cuda", "auto"], choices=["cpu", "cuda", "auto"],
help='Device to use for computation ("cpu", "cuda", "auto")', help='Device to use for computation ("cpu", "cuda", "auto")',
) )
parser.add_argument( parser.add_argument(
"--compute_type", "--compute_type",
type=str, type=str,
default="default", default="default",
choices=[ choices=[
"int8", "int8",
"int8_float32", "int8_float32",
"int8_float16", "int8_float16",
"int8_bfloat16", "int8_bfloat16",
"int16", "int16",
"float16", "float16",
"bfloat16", "bfloat16",
"float32", "float32",
], ],
help="Type to use for computation. \ help="Type to use for computation. \
See https://opennmt.net/CTranslate2/quantization.html.", See https://opennmt.net/CTranslate2/quantization.html.",
) )
parser.add_argument( parser.add_argument(
"--beam_size", "--beam_size",
type=int, type=int,
default=5, default=5,
help="model parameter, tweak to increase accuracy", help="model parameter, tweak to increase accuracy",
) )
parser.add_argument( parser.add_argument(
"--no_speech_threshold", "--no_speech_threshold",
type=float, type=float,
default=0.6, default=0.6,
help="model parameter, tweak to increase accuracy", help="model parameter, tweak to increase accuracy",
) )
parser.add_argument( parser.add_argument(
"--condition_on_previous_text", "--condition_on_previous_text",
type=str2bool, type=str2bool,
default=True, default=True,
help="model parameter, tweak to increase accuracy", help="model parameter, tweak to increase accuracy",
) )
parser.add_argument( parser.add_argument(
"--task", "--task",
type=str, type=str,
default="transcribe", default="transcribe",
choices=["transcribe", "translate"], choices=["transcribe", "translate"],
help="whether to perform X->X speech recognition ('transcribe') \ help="whether to perform X->X speech recognition ('transcribe') \
or X->English translation ('translate')", or X->English translation ('translate')",
) )
parser.add_argument( parser.add_argument(
"--language", "--language",
type=str, type=str,
default="auto", default="auto",
choices=LANGUAGE_CODES, choices=LANGUAGE_CODES,
help="What is the origin language of the video? \ help="What is the origin language of the video? \
If unset, it is detected automatically.", If unset, it is detected automatically.",
) )
parser.add_argument(
args = parser.parse_args().__dict__ "--workers",
type=int,
process(args) default=1,
help="Number of concurrent workers for processing episodes. \
Increase for better CUDA utilization with multiple episodes.",
if __name__ == "__main__": )
main()
args = parser.parse_args().__dict__
process(args)
if __name__ == "__main__":
main()

View File

@ -1,66 +1,122 @@
import os import os
import warnings import warnings
import tempfile import tempfile
import time import time
from utils.files import filename, write_srt import threading
from utils.ffmpeg import get_audio, add_subtitles_to_mp4 from concurrent.futures import ThreadPoolExecutor, as_completed
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series from utils.files import filename, write_srt
from utils.sonarr import update_show_in_sonarr from utils.ffmpeg import get_audio, add_subtitles_to_mp4
from utils.whisper import WhisperAI from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
from utils.sonarr import update_show_in_sonarr
from utils.whisper import WhisperAI
def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language") def process_episode(episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes):
sample_interval: str = args.pop("sample_interval") """Process a single episode for subtitle generation."""
audio_channel: str = args.pop("audio_channel") episode_id = episode["sonarrEpisodeId"]
if model_name.endswith(".en"): try:
warnings.warn( # Double-check that this episode is still wanted before processing
f"{model_name} is an English-only model, forcing English detection." current_wanted = get_wanted_episodes()
) still_wanted = any(ep["sonarrEpisodeId"] == episode_id for ep in current_wanted["data"])
args["language"] = "en"
# if translate task used and language argument is set, then use it if not still_wanted:
elif language != "auto": processing_episodes.discard(episode_id)
args["language"] = language return f"Skipped (no longer wanted): {episode['seriesTitle']} - {episode['episode_number']}"
model_args = {} print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
model_args["model_size_or_path"] = model_name episode_data = get_episode_details(episode_id)
model_args["device"] = args.pop("device") audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
model_args["compute_type"] = args.pop("compute_type") subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
list_of_episodes_needing_subtitles = get_wanted_episodes() add_subtitles_to_mp4(subtitles)
print( update_show_in_sonarr(episode["sonarrSeriesId"])
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles." time.sleep(5)
) sync_series()
for episode in list_of_episodes_needing_subtitles["data"]:
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}") processing_episodes.discard(episode_id)
episode_data = get_episode_details(episode["sonarrEpisodeId"]) completed_episodes.append(episode_id)
audios = get_audio([episode_data["path"]], audio_channel, sample_interval) return f"Completed: {episode['seriesTitle']} - {episode['episode_number']}"
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args) except Exception as e:
processing_episodes.discard(episode_id)
add_subtitles_to_mp4(subtitles) return f"Failed {episode['seriesTitle']} - {episode['episode_number']}: {str(e)}"
update_show_in_sonarr(episode["sonarrSeriesId"])
time.sleep(5)
sync_series() def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language")
def get_subtitles( sample_interval: str = args.pop("sample_interval")
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict audio_channel: str = args.pop("audio_channel")
): workers: int = args.pop("workers", 1)
model = WhisperAI(model_args, transcribe_args)
if model_name.endswith(".en"):
subtitles_path = {} warnings.warn(
f"{model_name} is an English-only model, forcing English detection."
for path, audio_path in audio_paths.items(): )
print(f"Generating subtitles for {filename(path)}... This might take a while.") args["language"] = "en"
srt_path = os.path.join(output_dir, f"{filename(path)}.srt") # if translate task used and language argument is set, then use it
elif language != "auto":
segments = model.transcribe(audio_path) args["language"] = language
with open(srt_path, "w", encoding="utf-8") as srt: model_args = {}
write_srt(segments, file=srt) model_args["model_size_or_path"] = model_name
model_args["device"] = args.pop("device")
subtitles_path[path] = srt_path model_args["compute_type"] = args.pop("compute_type")
return subtitles_path list_of_episodes_needing_subtitles = get_wanted_episodes()
print(
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
)
print(f"Processing with {workers} concurrent worker(s)...")
# Thread-safe tracking of episodes being processed and completed
processing_episodes = set()
completed_episodes_list = []
total_episodes = len(list_of_episodes_needing_subtitles["data"])
# Filter episodes to avoid duplicates and respect concurrent processing limits
episodes_to_process = []
for episode in list_of_episodes_needing_subtitles["data"]:
episode_id = episode["sonarrEpisodeId"]
if episode_id not in processing_episodes:
processing_episodes.add(episode_id)
episodes_to_process.append(episode)
print(f"Starting processing of {len(episodes_to_process)} unique episodes...")
with ThreadPoolExecutor(max_workers=workers) as executor:
# Submit episodes for processing with tracking sets
future_to_episode = {
executor.submit(process_episode, episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes_list): episode
for episode in episodes_to_process
}
# Collect results as they complete
completed_count = 0
for future in as_completed(future_to_episode):
completed_count += 1
result = future.result()
print(f"[{completed_count}/{total_episodes}] {result}")
print(f"Processing complete. {len(completed_episodes_list)} episodes processed successfully.")
def get_subtitles(
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
):
model = WhisperAI(model_args, transcribe_args)
subtitles_path = {}
for path, audio_path in audio_paths.items():
print(f"Generating subtitles for {filename(path)}... This might take a while.")
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
segments = model.transcribe(audio_path)
with open(srt_path, "w", encoding="utf-8") as srt:
write_srt(segments, file=srt)
subtitles_path[path] = srt_path
return subtitles_path

View File

@ -1,40 +1,40 @@
import requests import requests
import configparser import configparser
config = configparser.RawConfigParser() config = configparser.RawConfigParser()
config.read("config.cfg") config.read("config.cfg")
token = config._sections["bazarr"]["token"] token = config._sections["bazarr"]["token"]
base_url = config._sections["bazarr"]["url"] base_url = config._sections["bazarr"]["url"]
def get_wanted_episodes(): def get_wanted_episodes():
url = f"{base_url}/api/episodes/wanted" url = f"{base_url}/api/episodes/wanted"
payload = {} payload = {}
headers = {"accept": "application/json", "X-API-KEY": token} headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload) response = requests.request("GET", url, headers=headers, data=payload)
return response.json() return response.json()
def get_episode_details(episode_id: str): def get_episode_details(episode_id: str):
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}" url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
payload = {} payload = {}
headers = {"accept": "application/json", "X-API-KEY": token} headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload) response = requests.request("GET", url, headers=headers, data=payload)
return response.json()["data"][0] return response.json()["data"][0]
def sync_series(): def sync_series():
url = f"{base_url}/api/system/tasks?taskid=update_series" url = f"{base_url}/api/system/tasks?taskid=update_series"
payload = {} payload = {}
headers = {"accept": "application/json", "X-API-KEY": token} headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("POST", url, headers=headers, data=payload) response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code == 204: if response.status_code == 204:
print("Updated Bazarr") print("Updated Bazarr")

View File

@ -1,105 +1,105 @@
""" """
List of available language codes List of available language codes
""" """
LANGUAGE_CODES = [ LANGUAGE_CODES = [
"af", "af",
"am", "am",
"ar", "ar",
"as", "as",
"az", "az",
"ba", "ba",
"be", "be",
"bg", "bg",
"bn", "bn",
"bo", "bo",
"br", "br",
"bs", "bs",
"ca", "ca",
"cs", "cs",
"cy", "cy",
"da", "da",
"de", "de",
"el", "el",
"en", "en",
"es", "es",
"et", "et",
"eu", "eu",
"fa", "fa",
"fi", "fi",
"fo", "fo",
"fr", "fr",
"gl", "gl",
"gu", "gu",
"ha", "ha",
"haw", "haw",
"he", "he",
"hi", "hi",
"hr", "hr",
"ht", "ht",
"hu", "hu",
"hy", "hy",
"id", "id",
"is", "is",
"it", "it",
"ja", "ja",
"jw", "jw",
"ka", "ka",
"kk", "kk",
"km", "km",
"kn", "kn",
"ko", "ko",
"la", "la",
"lb", "lb",
"ln", "ln",
"lo", "lo",
"lt", "lt",
"lv", "lv",
"mg", "mg",
"mi", "mi",
"mk", "mk",
"ml", "ml",
"mn", "mn",
"mr", "mr",
"ms", "ms",
"mt", "mt",
"my", "my",
"ne", "ne",
"nl", "nl",
"nn", "nn",
"no", "no",
"oc", "oc",
"pa", "pa",
"pl", "pl",
"ps", "ps",
"pt", "pt",
"ro", "ro",
"ru", "ru",
"sa", "sa",
"sd", "sd",
"si", "si",
"sk", "sk",
"sl", "sl",
"sn", "sn",
"so", "so",
"sq", "sq",
"sr", "sr",
"su", "su",
"sv", "sv",
"sw", "sw",
"ta", "ta",
"te", "te",
"tg", "tg",
"th", "th",
"tk", "tk",
"tl", "tl",
"tr", "tr",
"tt", "tt",
"uk", "uk",
"ur", "ur",
"uz", "uz",
"vi", "vi",
"yi", "yi",
"yo", "yo",
"zh", "zh",
"yue", "yue",
] ]

View File

@ -1,92 +1,92 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
def str2bool(string: str): def str2bool(string: str):
string = string.lower() string = string.lower()
str2val = {"true": True, "false": False} str2val = {"true": True, "false": False}
if string in str2val: if string in str2val:
return str2val[string] return str2val[string]
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
def str2timeinterval(string: str): def str2timeinterval(string: str):
if string is None: if string is None:
return None return None
if "-" not in string: if "-" not in string:
raise ValueError( raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
) )
intervals = string.split("-") intervals = string.split("-")
if len(intervals) != 2: if len(intervals) != 2:
raise ValueError( raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}" f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
) )
start = try_parse_timestamp(intervals[0]) start = try_parse_timestamp(intervals[0])
end = try_parse_timestamp(intervals[1]) end = try_parse_timestamp(intervals[1])
if start >= end: if start >= end:
raise ValueError( raise ValueError(
f"Expected time interval end to be higher than start, got {start} >= {end}" f"Expected time interval end to be higher than start, got {start} >= {end}"
) )
return [start, end] return [start, end]
def time_to_timestamp(string: str): def time_to_timestamp(string: str):
split_time = string.split(":") split_time = string.split(":")
if ( if (
len(split_time) == 0 len(split_time) == 0
or len(split_time) > 3 or len(split_time) > 3
or not all(x.isdigit() for x in split_time) or not all(x.isdigit() for x in split_time)
): ):
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}") raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
if len(split_time) == 1: if len(split_time) == 1:
return int(split_time[0]) return int(split_time[0])
if len(split_time) == 2: if len(split_time) == 2:
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2]) return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
def try_parse_timestamp(string: str): def try_parse_timestamp(string: str):
timestamp = parse_timestamp(string, "%H:%M:%S") timestamp = parse_timestamp(string, "%H:%M:%S")
if timestamp is not None: if timestamp is not None:
return timestamp return timestamp
timestamp = parse_timestamp(string, "%H:%M") timestamp = parse_timestamp(string, "%H:%M")
if timestamp is not None: if timestamp is not None:
return timestamp return timestamp
return parse_timestamp(string, "%S") return parse_timestamp(string, "%S")
def parse_timestamp(string: str, pattern: str): def parse_timestamp(string: str, pattern: str):
try: try:
date = datetime.strptime(string, pattern) date = datetime.strptime(string, pattern)
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second) delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
return int(delta.total_seconds()) return int(delta.total_seconds())
except: # pylint: disable=bare-except except: # pylint: disable=bare-except
return None return None
def format_timestamp(seconds: float, always_include_hours: bool = False): def format_timestamp(seconds: float, always_include_hours: bool = False):
assert seconds >= 0, "non-negative timestamp expected" assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0) milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000 hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000 milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000 minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000 milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000 seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000 milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"

View File

@ -1,59 +1,59 @@
import os import os
import tempfile import tempfile
import ffmpeg import ffmpeg
from .files import filename from .files import filename
def get_audio(paths: list, audio_channel_index: int, sample_interval: list): def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()
audio_paths = {} audio_paths = {}
for path in paths: for path in paths:
print(f"Extracting audio from {filename(path)}...") print(f"Extracting audio from {filename(path)}...")
output_path = os.path.join(temp_dir, f"{filename(path)}.wav") output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
ffmpeg_input_args = {} ffmpeg_input_args = {}
if sample_interval is not None: if sample_interval is not None:
ffmpeg_input_args["ss"] = str(sample_interval[0]) ffmpeg_input_args["ss"] = str(sample_interval[0])
ffmpeg_output_args = {} ffmpeg_output_args = {}
ffmpeg_output_args["acodec"] = "pcm_s16le" ffmpeg_output_args["acodec"] = "pcm_s16le"
ffmpeg_output_args["ac"] = "1" ffmpeg_output_args["ac"] = "1"
ffmpeg_output_args["ar"] = "16k" ffmpeg_output_args["ar"] = "16k"
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index) ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
if sample_interval is not None: if sample_interval is not None:
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0]) ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
ffmpeg.input(path, **ffmpeg_input_args).output( ffmpeg.input(path, **ffmpeg_input_args).output(
output_path, **ffmpeg_output_args output_path, **ffmpeg_output_args
).run(quiet=True, overwrite_output=True) ).run(quiet=True, overwrite_output=True)
audio_paths[path] = output_path audio_paths[path] = output_path
return audio_paths return audio_paths
def add_subtitles_to_mp4(subtitles: dict): def add_subtitles_to_mp4(subtitles: dict):
input_file = list(subtitles.keys())[0] input_file = list(subtitles.keys())[0]
subtitle_file = subtitles[input_file] subtitle_file = subtitles[input_file]
output_file = input_file output_file = input_file
os.rename(input_file, input_file + "_edit") os.rename(input_file, input_file + "_edit")
input_stream = ffmpeg.input(input_file + "_edit") input_stream = ffmpeg.input(input_file + "_edit")
subtitle_stream = ffmpeg.input(subtitle_file) subtitle_stream = ffmpeg.input(subtitle_file)
# Combine input video and subtitle # Combine input video and subtitle
output = ffmpeg.output( output = ffmpeg.output(
input_stream, input_stream,
subtitle_stream, subtitle_stream,
output_file.replace(".mkv", ".mp4"), output_file.replace(".mkv", ".mp4"),
c="copy", c="copy",
**{"c:s": "mov_text"}, **{"c:s": "mov_text"},
**{"metadata:s:s:0": "language=eng"}, **{"metadata:s:s:0": "language=eng"},
) )
ffmpeg.run(output, quiet=True, overwrite_output=True) ffmpeg.run(output, quiet=True, overwrite_output=True)
os.remove(input_file + "_edit") os.remove(input_file + "_edit")
# remove tempfiles # remove tempfiles
os.remove(subtitle_file) os.remove(subtitle_file)
os.remove(subtitle_file.replace(".srt", ".wav")) os.remove(subtitle_file.replace(".srt", ".wav"))

View File

@ -1,19 +1,19 @@
import os import os
from typing import Iterator, TextIO from typing import Iterator, TextIO
from .convert import format_timestamp from .convert import format_timestamp
def write_srt(transcript: Iterator[dict], file: TextIO): def write_srt(transcript: Iterator[dict], file: TextIO):
for i, segment in enumerate(transcript, start=1): for i, segment in enumerate(transcript, start=1):
print( print(
f"{i}\n" f"{i}\n"
f"{format_timestamp(segment.start, always_include_hours=True)} --> " f"{format_timestamp(segment.start, always_include_hours=True)} --> "
f"{format_timestamp(segment.end, always_include_hours=True)}\n" f"{format_timestamp(segment.end, always_include_hours=True)}\n"
f"{segment.text.strip().replace('-->', '->')}\n", f"{segment.text.strip().replace('-->', '->')}\n",
file=file, file=file,
flush=True, flush=True,
) )
def filename(path: str): def filename(path: str):
return os.path.splitext(os.path.basename(path))[0] return os.path.splitext(os.path.basename(path))[0]

View File

@ -1,24 +1,24 @@
import requests import requests
import json import json
import configparser import configparser
config = configparser.RawConfigParser() config = configparser.RawConfigParser()
config.read("config.cfg") config.read("config.cfg")
token = config._sections["sonarr"]["token"] token = config._sections["sonarr"]["token"]
base_url = config._sections["sonarr"]["url"] base_url = config._sections["sonarr"]["url"]
def update_show_in_sonarr(show_id): def update_show_in_sonarr(show_id):
url = f"{base_url}/api/v3/command" url = f"{base_url}/api/v3/command"
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id}) payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Api-Key": token, "X-Api-Key": token,
} }
response = requests.request("POST", url, headers=headers, data=payload) response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code != 404: if response.status_code != 404:
print("Updated show in Sonarr") print("Updated show in Sonarr")

View File

@ -1,66 +1,66 @@
import warnings import warnings
import faster_whisper import faster_whisper
from tqdm import tqdm from tqdm import tqdm
# pylint: disable=R0903 # pylint: disable=R0903
class WhisperAI: class WhisperAI:
""" """
Wrapper class for the Whisper speech recognition model with additional functionality. Wrapper class for the Whisper speech recognition model with additional functionality.
This class provides a high-level interface for transcribing audio files using the Whisper This class provides a high-level interface for transcribing audio files using the Whisper
speech recognition model. It encapsulates the model instantiation and transcription process, speech recognition model. It encapsulates the model instantiation and transcription process,
allowing users to easily transcribe audio files and iterate over the resulting segments. allowing users to easily transcribe audio files and iterate over the resulting segments.
Usage: Usage:
```python ```python
whisper = WhisperAI(model_args, transcribe_args) whisper = WhisperAI(model_args, transcribe_args)
# Transcribe an audio file and iterate over the segments # Transcribe an audio file and iterate over the segments
for segment in whisper.transcribe(audio_path): for segment in whisper.transcribe(audio_path):
# Process each transcription segment # Process each transcription segment
print(segment) print(segment)
``` ```
Args: Args:
- model_args: Arguments to pass to WhisperModel initialize method - model_args: Arguments to pass to WhisperModel initialize method
- model_size_or_path (str): The name of the Whisper model to use. - model_size_or_path (str): The name of the Whisper model to use.
- device (str): The device to use for computation ("cpu", "cuda", "auto"). - device (str): The device to use for computation ("cpu", "cuda", "auto").
- compute_type (str): The type to use for computation. - compute_type (str): The type to use for computation.
See https://opennmt.net/CTranslate2/quantization.html. See https://opennmt.net/CTranslate2/quantization.html.
- transcribe_args (dict): Additional arguments to pass to the transcribe method. - transcribe_args (dict): Additional arguments to pass to the transcribe method.
Attributes: Attributes:
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model. - model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
- transcribe_args (dict): Additional arguments used for transcribe method. - transcribe_args (dict): Additional arguments used for transcribe method.
Methods: Methods:
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments. - transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
""" """
def __init__(self, model_args: dict, transcribe_args: dict): def __init__(self, model_args: dict, transcribe_args: dict):
self.model = faster_whisper.WhisperModel(**model_args) self.model = faster_whisper.WhisperModel(**model_args)
self.transcribe_args = transcribe_args self.transcribe_args = transcribe_args
def transcribe(self, audio_path: str): def transcribe(self, audio_path: str):
""" """
Transcribes the specified audio file and yields the resulting segments. Transcribes the specified audio file and yields the resulting segments.
Args: Args:
- audio_path (str): The path to the audio file for transcription. - audio_path (str): The path to the audio file for transcription.
Yields: Yields:
- faster_whisper.TranscriptionSegment: An individual transcription segment. - faster_whisper.TranscriptionSegment: An individual transcription segment.
""" """
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
segments, info = self.model.transcribe(audio_path, **self.transcribe_args) segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
warnings.filterwarnings("default") warnings.filterwarnings("default")
# Same precision as the Whisper timestamps. # Same precision as the Whisper timestamps.
total_duration = round(info.duration, 2) total_duration = round(info.duration, 2)
with tqdm(total=total_duration, unit=" seconds") as pbar: with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments: for segment in segments:
yield segment yield segment
pbar.update(segment.end - segment.start) pbar.update(segment.end - segment.start)
pbar.update(0) pbar.update(0)

View File

@ -1,6 +1,6 @@
[bazarr] [bazarr]
url = http://1.1.1.1 url = http://1.1.1.1
token = djfkjadncdfjkanvfjkvandfj token = djfkjadncdfjkanvfjkvandfj
[sonarr] [sonarr]
url = http://2.2.2.2:8989 url = http://2.2.2.2:8989
token = dfifdmnajcdnjcvaldnjlk token = dfifdmnajcdnjcvaldnjlk

View File

@ -1,3 +1,3 @@
faster-whisper==0.10.0 faster-whisper==0.10.0
tqdm==4.56.0 tqdm==4.56.0
ffmpeg-python==0.2.0 ffmpeg-python==0.2.0

View File

@ -1,19 +1,19 @@
from setuptools import setup, find_packages from setuptools import setup, find_packages
setup( setup(
version="1.0", version="1.0",
name="bazarr-ai-sub-generator", name="bazarr-ai-sub-generator",
packages=find_packages(), packages=find_packages(),
py_modules=["bazarr-ai-sub-generator"], py_modules=["bazarr-ai-sub-generator"],
author="Karl Hudgell", author="Karl Hudgell",
install_requires=[ install_requires=[
'faster-whisper', 'faster-whisper',
'tqdm', 'tqdm',
'ffmpeg-python' 'ffmpeg-python'
], ],
description="Automatically generate and embed subtitles into your videos", description="Automatically generate and embed subtitles into your videos",
entry_points={ entry_points={
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'], 'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
}, },
include_package_data=True, include_package_data=True,
) )