latest commit

This commit is contained in:
Karl 2025-10-19 11:53:37 +01:00
parent 77b28df03d
commit 5b27fdbc75
17 changed files with 783 additions and 720 deletions

View File

@ -1,24 +1,24 @@
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
pip install -r requirements.txt
- name: Analysing the code with pylint
run: |
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
pip install -r requirements.txt
- name: Analysing the code with pylint
run: |
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')

18
.gitignore vendored
View File

@ -1,9 +1,9 @@
dist
.DS_Store
*.egg-info
build
__pycache__
venv/
test/
.vscode/launch.json
config.cfg
dist
.DS_Store
*.egg-info
build
__pycache__
venv/
test/
.vscode/launch.json
config.cfg

38
.vscode/launch.json vendored
View File

@ -1,20 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"args": [
"--model",
"base",
],
}
]
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"args": [
"--model",
"base",
],
}
]
}

44
LICENSE
View File

@ -1,22 +1,22 @@
MIT License
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
MIT License
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

100
README.md
View File

@ -1,50 +1,50 @@
# bazarr-ai-sub-generator
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
Clunky, and slow, but works.
## Installation
## Usage
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
faster_auto_subtitle /path/to/video.mp4 --model medium
Adding `--task translate` will translate the subtitles into English:
faster_auto_subtitle /path/to/video.mp4 --task translate
Run the following to view all available options:
faster_auto_subtitle --help
## Tips
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
## License
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->
# bazarr-ai-sub-generator
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
Clunky, and slow, but works.
## Installation
## Usage
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
faster_auto_subtitle /path/to/video.mp4 --model medium
Adding `--task translate` will translate the subtitles into English:
faster_auto_subtitle /path/to/video.mp4 --task translate
Run the following to view all available options:
faster_auto_subtitle --help
## Tips
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
## License
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->

View File

@ -1,99 +1,106 @@
import argparse
from faster_whisper import available_models
from utils.constants import LANGUAGE_CODES
from main import process
from utils.convert import str2bool, str2timeinterval
def main():
"""
Main entry point for the script.
Parses command line arguments, processes the inputs using the specified options,
and performs transcription or translation based on the specified task.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--audio_channel", default="0", type=int, help="audio channel index to use"
)
parser.add_argument(
"--sample_interval",
type=str2timeinterval,
default=None,
help="generate subtitles for a specific \
fragment of the video (e.g. 01:02:05-01:03:45)",
)
parser.add_argument(
"--model",
default="small",
choices=available_models(),
help="name of the Whisper model to use",
)
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["cpu", "cuda", "auto"],
help='Device to use for computation ("cpu", "cuda", "auto")',
)
parser.add_argument(
"--compute_type",
type=str,
default="default",
choices=[
"int8",
"int8_float32",
"int8_float16",
"int8_bfloat16",
"int16",
"float16",
"bfloat16",
"float32",
],
help="Type to use for computation. \
See https://opennmt.net/CTranslate2/quantization.html.",
)
parser.add_argument(
"--beam_size",
type=int,
default=5,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--no_speech_threshold",
type=float,
default=0.6,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--condition_on_previous_text",
type=str2bool,
default=True,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--task",
type=str,
default="transcribe",
choices=["transcribe", "translate"],
help="whether to perform X->X speech recognition ('transcribe') \
or X->English translation ('translate')",
)
parser.add_argument(
"--language",
type=str,
default="auto",
choices=LANGUAGE_CODES,
help="What is the origin language of the video? \
If unset, it is detected automatically.",
)
args = parser.parse_args().__dict__
process(args)
if __name__ == "__main__":
main()
import argparse
from faster_whisper import available_models
from utils.constants import LANGUAGE_CODES
from main import process
from utils.convert import str2bool, str2timeinterval
def main():
"""
Main entry point for the script.
Parses command line arguments, processes the inputs using the specified options,
and performs transcription or translation based on the specified task.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--audio_channel", default="0", type=int, help="audio channel index to use"
)
parser.add_argument(
"--sample_interval",
type=str2timeinterval,
default=None,
help="generate subtitles for a specific \
fragment of the video (e.g. 01:02:05-01:03:45)",
)
parser.add_argument(
"--model",
default="small",
choices=available_models(),
help="name of the Whisper model to use",
)
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["cpu", "cuda", "auto"],
help='Device to use for computation ("cpu", "cuda", "auto")',
)
parser.add_argument(
"--compute_type",
type=str,
default="default",
choices=[
"int8",
"int8_float32",
"int8_float16",
"int8_bfloat16",
"int16",
"float16",
"bfloat16",
"float32",
],
help="Type to use for computation. \
See https://opennmt.net/CTranslate2/quantization.html.",
)
parser.add_argument(
"--beam_size",
type=int,
default=5,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--no_speech_threshold",
type=float,
default=0.6,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--condition_on_previous_text",
type=str2bool,
default=True,
help="model parameter, tweak to increase accuracy",
)
parser.add_argument(
"--task",
type=str,
default="transcribe",
choices=["transcribe", "translate"],
help="whether to perform X->X speech recognition ('transcribe') \
or X->English translation ('translate')",
)
parser.add_argument(
"--language",
type=str,
default="auto",
choices=LANGUAGE_CODES,
help="What is the origin language of the video? \
If unset, it is detected automatically.",
)
parser.add_argument(
"--workers",
type=int,
default=1,
help="Number of concurrent workers for processing episodes. \
Increase for better CUDA utilization with multiple episodes.",
)
args = parser.parse_args().__dict__
process(args)
if __name__ == "__main__":
main()

View File

@ -1,66 +1,122 @@
import os
import warnings
import tempfile
import time
from utils.files import filename, write_srt
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
from utils.sonarr import update_show_in_sonarr
from utils.whisper import WhisperAI
def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language")
sample_interval: str = args.pop("sample_interval")
audio_channel: str = args.pop("audio_channel")
if model_name.endswith(".en"):
warnings.warn(
f"{model_name} is an English-only model, forcing English detection."
)
args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language
model_args = {}
model_args["model_size_or_path"] = model_name
model_args["device"] = args.pop("device")
model_args["compute_type"] = args.pop("compute_type")
list_of_episodes_needing_subtitles = get_wanted_episodes()
print(
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
)
for episode in list_of_episodes_needing_subtitles["data"]:
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
episode_data = get_episode_details(episode["sonarrEpisodeId"])
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
add_subtitles_to_mp4(subtitles)
update_show_in_sonarr(episode["sonarrSeriesId"])
time.sleep(5)
sync_series()
def get_subtitles(
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
):
model = WhisperAI(model_args, transcribe_args)
subtitles_path = {}
for path, audio_path in audio_paths.items():
print(f"Generating subtitles for {filename(path)}... This might take a while.")
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
segments = model.transcribe(audio_path)
with open(srt_path, "w", encoding="utf-8") as srt:
write_srt(segments, file=srt)
subtitles_path[path] = srt_path
return subtitles_path
import os
import warnings
import tempfile
import time
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from utils.files import filename, write_srt
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
from utils.sonarr import update_show_in_sonarr
from utils.whisper import WhisperAI
def process_episode(episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes):
"""Process a single episode for subtitle generation."""
episode_id = episode["sonarrEpisodeId"]
try:
# Double-check that this episode is still wanted before processing
current_wanted = get_wanted_episodes()
still_wanted = any(ep["sonarrEpisodeId"] == episode_id for ep in current_wanted["data"])
if not still_wanted:
processing_episodes.discard(episode_id)
return f"Skipped (no longer wanted): {episode['seriesTitle']} - {episode['episode_number']}"
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
episode_data = get_episode_details(episode_id)
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
add_subtitles_to_mp4(subtitles)
update_show_in_sonarr(episode["sonarrSeriesId"])
time.sleep(5)
sync_series()
processing_episodes.discard(episode_id)
completed_episodes.append(episode_id)
return f"Completed: {episode['seriesTitle']} - {episode['episode_number']}"
except Exception as e:
processing_episodes.discard(episode_id)
return f"Failed {episode['seriesTitle']} - {episode['episode_number']}: {str(e)}"
def process(args: dict):
model_name: str = args.pop("model")
language: str = args.pop("language")
sample_interval: str = args.pop("sample_interval")
audio_channel: str = args.pop("audio_channel")
workers: int = args.pop("workers", 1)
if model_name.endswith(".en"):
warnings.warn(
f"{model_name} is an English-only model, forcing English detection."
)
args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language
model_args = {}
model_args["model_size_or_path"] = model_name
model_args["device"] = args.pop("device")
model_args["compute_type"] = args.pop("compute_type")
list_of_episodes_needing_subtitles = get_wanted_episodes()
print(
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
)
print(f"Processing with {workers} concurrent worker(s)...")
# Thread-safe tracking of episodes being processed and completed
processing_episodes = set()
completed_episodes_list = []
total_episodes = len(list_of_episodes_needing_subtitles["data"])
# Filter episodes to avoid duplicates and respect concurrent processing limits
episodes_to_process = []
for episode in list_of_episodes_needing_subtitles["data"]:
episode_id = episode["sonarrEpisodeId"]
if episode_id not in processing_episodes:
processing_episodes.add(episode_id)
episodes_to_process.append(episode)
print(f"Starting processing of {len(episodes_to_process)} unique episodes...")
with ThreadPoolExecutor(max_workers=workers) as executor:
# Submit episodes for processing with tracking sets
future_to_episode = {
executor.submit(process_episode, episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes_list): episode
for episode in episodes_to_process
}
# Collect results as they complete
completed_count = 0
for future in as_completed(future_to_episode):
completed_count += 1
result = future.result()
print(f"[{completed_count}/{total_episodes}] {result}")
print(f"Processing complete. {len(completed_episodes_list)} episodes processed successfully.")
def get_subtitles(
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
):
model = WhisperAI(model_args, transcribe_args)
subtitles_path = {}
for path, audio_path in audio_paths.items():
print(f"Generating subtitles for {filename(path)}... This might take a while.")
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
segments = model.transcribe(audio_path)
with open(srt_path, "w", encoding="utf-8") as srt:
write_srt(segments, file=srt)
subtitles_path[path] = srt_path
return subtitles_path

View File

@ -1,40 +1,40 @@
import requests
import configparser
config = configparser.RawConfigParser()
config.read("config.cfg")
token = config._sections["bazarr"]["token"]
base_url = config._sections["bazarr"]["url"]
def get_wanted_episodes():
url = f"{base_url}/api/episodes/wanted"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()
def get_episode_details(episode_id: str):
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()["data"][0]
def sync_series():
url = f"{base_url}/api/system/tasks?taskid=update_series"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code == 204:
print("Updated Bazarr")
import requests
import configparser
config = configparser.RawConfigParser()
config.read("config.cfg")
token = config._sections["bazarr"]["token"]
base_url = config._sections["bazarr"]["url"]
def get_wanted_episodes():
url = f"{base_url}/api/episodes/wanted"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()
def get_episode_details(episode_id: str):
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()["data"][0]
def sync_series():
url = f"{base_url}/api/system/tasks?taskid=update_series"
payload = {}
headers = {"accept": "application/json", "X-API-KEY": token}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code == 204:
print("Updated Bazarr")

View File

@ -1,105 +1,105 @@
"""
List of available language codes
"""
LANGUAGE_CODES = [
"af",
"am",
"ar",
"as",
"az",
"ba",
"be",
"bg",
"bn",
"bo",
"br",
"bs",
"ca",
"cs",
"cy",
"da",
"de",
"el",
"en",
"es",
"et",
"eu",
"fa",
"fi",
"fo",
"fr",
"gl",
"gu",
"ha",
"haw",
"he",
"hi",
"hr",
"ht",
"hu",
"hy",
"id",
"is",
"it",
"ja",
"jw",
"ka",
"kk",
"km",
"kn",
"ko",
"la",
"lb",
"ln",
"lo",
"lt",
"lv",
"mg",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"ne",
"nl",
"nn",
"no",
"oc",
"pa",
"pl",
"ps",
"pt",
"ro",
"ru",
"sa",
"sd",
"si",
"sk",
"sl",
"sn",
"so",
"sq",
"sr",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"tk",
"tl",
"tr",
"tt",
"uk",
"ur",
"uz",
"vi",
"yi",
"yo",
"zh",
"yue",
]
"""
List of available language codes
"""
LANGUAGE_CODES = [
"af",
"am",
"ar",
"as",
"az",
"ba",
"be",
"bg",
"bn",
"bo",
"br",
"bs",
"ca",
"cs",
"cy",
"da",
"de",
"el",
"en",
"es",
"et",
"eu",
"fa",
"fi",
"fo",
"fr",
"gl",
"gu",
"ha",
"haw",
"he",
"hi",
"hr",
"ht",
"hu",
"hy",
"id",
"is",
"it",
"ja",
"jw",
"ka",
"kk",
"km",
"kn",
"ko",
"la",
"lb",
"ln",
"lo",
"lt",
"lv",
"mg",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"ne",
"nl",
"nn",
"no",
"oc",
"pa",
"pl",
"ps",
"pt",
"ro",
"ru",
"sa",
"sd",
"si",
"sk",
"sl",
"sn",
"so",
"sq",
"sr",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"tk",
"tl",
"tr",
"tt",
"uk",
"ur",
"uz",
"vi",
"yi",
"yo",
"zh",
"yue",
]

View File

@ -1,92 +1,92 @@
from datetime import datetime, timedelta
def str2bool(string: str):
string = string.lower()
str2val = {"true": True, "false": False}
if string in str2val:
return str2val[string]
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
def str2timeinterval(string: str):
if string is None:
return None
if "-" not in string:
raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
)
intervals = string.split("-")
if len(intervals) != 2:
raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
)
start = try_parse_timestamp(intervals[0])
end = try_parse_timestamp(intervals[1])
if start >= end:
raise ValueError(
f"Expected time interval end to be higher than start, got {start} >= {end}"
)
return [start, end]
def time_to_timestamp(string: str):
split_time = string.split(":")
if (
len(split_time) == 0
or len(split_time) > 3
or not all(x.isdigit() for x in split_time)
):
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
if len(split_time) == 1:
return int(split_time[0])
if len(split_time) == 2:
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
def try_parse_timestamp(string: str):
timestamp = parse_timestamp(string, "%H:%M:%S")
if timestamp is not None:
return timestamp
timestamp = parse_timestamp(string, "%H:%M")
if timestamp is not None:
return timestamp
return parse_timestamp(string, "%S")
def parse_timestamp(string: str, pattern: str):
try:
date = datetime.strptime(string, pattern)
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
return int(delta.total_seconds())
except: # pylint: disable=bare-except
return None
def format_timestamp(seconds: float, always_include_hours: bool = False):
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
from datetime import datetime, timedelta
def str2bool(string: str):
string = string.lower()
str2val = {"true": True, "false": False}
if string in str2val:
return str2val[string]
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
def str2timeinterval(string: str):
if string is None:
return None
if "-" not in string:
raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
)
intervals = string.split("-")
if len(intervals) != 2:
raise ValueError(
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
)
start = try_parse_timestamp(intervals[0])
end = try_parse_timestamp(intervals[1])
if start >= end:
raise ValueError(
f"Expected time interval end to be higher than start, got {start} >= {end}"
)
return [start, end]
def time_to_timestamp(string: str):
split_time = string.split(":")
if (
len(split_time) == 0
or len(split_time) > 3
or not all(x.isdigit() for x in split_time)
):
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
if len(split_time) == 1:
return int(split_time[0])
if len(split_time) == 2:
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
def try_parse_timestamp(string: str):
timestamp = parse_timestamp(string, "%H:%M:%S")
if timestamp is not None:
return timestamp
timestamp = parse_timestamp(string, "%H:%M")
if timestamp is not None:
return timestamp
return parse_timestamp(string, "%S")
def parse_timestamp(string: str, pattern: str):
try:
date = datetime.strptime(string, pattern)
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
return int(delta.total_seconds())
except: # pylint: disable=bare-except
return None
def format_timestamp(seconds: float, always_include_hours: bool = False):
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"

View File

@ -1,59 +1,59 @@
import os
import tempfile
import ffmpeg
from .files import filename
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
temp_dir = tempfile.gettempdir()
audio_paths = {}
for path in paths:
print(f"Extracting audio from {filename(path)}...")
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
ffmpeg_input_args = {}
if sample_interval is not None:
ffmpeg_input_args["ss"] = str(sample_interval[0])
ffmpeg_output_args = {}
ffmpeg_output_args["acodec"] = "pcm_s16le"
ffmpeg_output_args["ac"] = "1"
ffmpeg_output_args["ar"] = "16k"
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
if sample_interval is not None:
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
ffmpeg.input(path, **ffmpeg_input_args).output(
output_path, **ffmpeg_output_args
).run(quiet=True, overwrite_output=True)
audio_paths[path] = output_path
return audio_paths
def add_subtitles_to_mp4(subtitles: dict):
input_file = list(subtitles.keys())[0]
subtitle_file = subtitles[input_file]
output_file = input_file
os.rename(input_file, input_file + "_edit")
input_stream = ffmpeg.input(input_file + "_edit")
subtitle_stream = ffmpeg.input(subtitle_file)
# Combine input video and subtitle
output = ffmpeg.output(
input_stream,
subtitle_stream,
output_file.replace(".mkv", ".mp4"),
c="copy",
**{"c:s": "mov_text"},
**{"metadata:s:s:0": "language=eng"},
)
ffmpeg.run(output, quiet=True, overwrite_output=True)
os.remove(input_file + "_edit")
# remove tempfiles
os.remove(subtitle_file)
os.remove(subtitle_file.replace(".srt", ".wav"))
import os
import tempfile
import ffmpeg
from .files import filename
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
temp_dir = tempfile.gettempdir()
audio_paths = {}
for path in paths:
print(f"Extracting audio from {filename(path)}...")
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
ffmpeg_input_args = {}
if sample_interval is not None:
ffmpeg_input_args["ss"] = str(sample_interval[0])
ffmpeg_output_args = {}
ffmpeg_output_args["acodec"] = "pcm_s16le"
ffmpeg_output_args["ac"] = "1"
ffmpeg_output_args["ar"] = "16k"
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
if sample_interval is not None:
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
ffmpeg.input(path, **ffmpeg_input_args).output(
output_path, **ffmpeg_output_args
).run(quiet=True, overwrite_output=True)
audio_paths[path] = output_path
return audio_paths
def add_subtitles_to_mp4(subtitles: dict):
input_file = list(subtitles.keys())[0]
subtitle_file = subtitles[input_file]
output_file = input_file
os.rename(input_file, input_file + "_edit")
input_stream = ffmpeg.input(input_file + "_edit")
subtitle_stream = ffmpeg.input(subtitle_file)
# Combine input video and subtitle
output = ffmpeg.output(
input_stream,
subtitle_stream,
output_file.replace(".mkv", ".mp4"),
c="copy",
**{"c:s": "mov_text"},
**{"metadata:s:s:0": "language=eng"},
)
ffmpeg.run(output, quiet=True, overwrite_output=True)
os.remove(input_file + "_edit")
# remove tempfiles
os.remove(subtitle_file)
os.remove(subtitle_file.replace(".srt", ".wav"))

View File

@ -1,19 +1,19 @@
import os
from typing import Iterator, TextIO
from .convert import format_timestamp
def write_srt(transcript: Iterator[dict], file: TextIO):
for i, segment in enumerate(transcript, start=1):
print(
f"{i}\n"
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
f"{segment.text.strip().replace('-->', '->')}\n",
file=file,
flush=True,
)
def filename(path: str):
return os.path.splitext(os.path.basename(path))[0]
import os
from typing import Iterator, TextIO
from .convert import format_timestamp
def write_srt(transcript: Iterator[dict], file: TextIO):
for i, segment in enumerate(transcript, start=1):
print(
f"{i}\n"
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
f"{segment.text.strip().replace('-->', '->')}\n",
file=file,
flush=True,
)
def filename(path: str):
return os.path.splitext(os.path.basename(path))[0]

View File

@ -1,24 +1,24 @@
import requests
import json
import configparser
config = configparser.RawConfigParser()
config.read("config.cfg")
token = config._sections["sonarr"]["token"]
base_url = config._sections["sonarr"]["url"]
def update_show_in_sonarr(show_id):
url = f"{base_url}/api/v3/command"
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
headers = {
"Content-Type": "application/json",
"X-Api-Key": token,
}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code != 404:
print("Updated show in Sonarr")
import requests
import json
import configparser
config = configparser.RawConfigParser()
config.read("config.cfg")
token = config._sections["sonarr"]["token"]
base_url = config._sections["sonarr"]["url"]
def update_show_in_sonarr(show_id):
url = f"{base_url}/api/v3/command"
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
headers = {
"Content-Type": "application/json",
"X-Api-Key": token,
}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code != 404:
print("Updated show in Sonarr")

View File

@ -1,66 +1,66 @@
import warnings
import faster_whisper
from tqdm import tqdm
# pylint: disable=R0903
class WhisperAI:
"""
Wrapper class for the Whisper speech recognition model with additional functionality.
This class provides a high-level interface for transcribing audio files using the Whisper
speech recognition model. It encapsulates the model instantiation and transcription process,
allowing users to easily transcribe audio files and iterate over the resulting segments.
Usage:
```python
whisper = WhisperAI(model_args, transcribe_args)
# Transcribe an audio file and iterate over the segments
for segment in whisper.transcribe(audio_path):
# Process each transcription segment
print(segment)
```
Args:
- model_args: Arguments to pass to WhisperModel initialize method
- model_size_or_path (str): The name of the Whisper model to use.
- device (str): The device to use for computation ("cpu", "cuda", "auto").
- compute_type (str): The type to use for computation.
See https://opennmt.net/CTranslate2/quantization.html.
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
Attributes:
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
- transcribe_args (dict): Additional arguments used for transcribe method.
Methods:
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
"""
def __init__(self, model_args: dict, transcribe_args: dict):
self.model = faster_whisper.WhisperModel(**model_args)
self.transcribe_args = transcribe_args
def transcribe(self, audio_path: str):
"""
Transcribes the specified audio file and yields the resulting segments.
Args:
- audio_path (str): The path to the audio file for transcription.
Yields:
- faster_whisper.TranscriptionSegment: An individual transcription segment.
"""
warnings.filterwarnings("ignore")
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
warnings.filterwarnings("default")
# Same precision as the Whisper timestamps.
total_duration = round(info.duration, 2)
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
yield segment
pbar.update(segment.end - segment.start)
pbar.update(0)
import warnings
import faster_whisper
from tqdm import tqdm
# pylint: disable=R0903
class WhisperAI:
"""
Wrapper class for the Whisper speech recognition model with additional functionality.
This class provides a high-level interface for transcribing audio files using the Whisper
speech recognition model. It encapsulates the model instantiation and transcription process,
allowing users to easily transcribe audio files and iterate over the resulting segments.
Usage:
```python
whisper = WhisperAI(model_args, transcribe_args)
# Transcribe an audio file and iterate over the segments
for segment in whisper.transcribe(audio_path):
# Process each transcription segment
print(segment)
```
Args:
- model_args: Arguments to pass to WhisperModel initialize method
- model_size_or_path (str): The name of the Whisper model to use.
- device (str): The device to use for computation ("cpu", "cuda", "auto").
- compute_type (str): The type to use for computation.
See https://opennmt.net/CTranslate2/quantization.html.
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
Attributes:
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
- transcribe_args (dict): Additional arguments used for transcribe method.
Methods:
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
"""
def __init__(self, model_args: dict, transcribe_args: dict):
self.model = faster_whisper.WhisperModel(**model_args)
self.transcribe_args = transcribe_args
def transcribe(self, audio_path: str):
"""
Transcribes the specified audio file and yields the resulting segments.
Args:
- audio_path (str): The path to the audio file for transcription.
Yields:
- faster_whisper.TranscriptionSegment: An individual transcription segment.
"""
warnings.filterwarnings("ignore")
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
warnings.filterwarnings("default")
# Same precision as the Whisper timestamps.
total_duration = round(info.duration, 2)
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
yield segment
pbar.update(segment.end - segment.start)
pbar.update(0)

View File

@ -1,6 +1,6 @@
[bazarr]
url = http://1.1.1.1
token = djfkjadncdfjkanvfjkvandfj
[sonarr]
url = http://2.2.2.2:8989
[bazarr]
url = http://1.1.1.1
token = djfkjadncdfjkanvfjkvandfj
[sonarr]
url = http://2.2.2.2:8989
token = dfifdmnajcdnjcvaldnjlk

View File

@ -1,3 +1,3 @@
faster-whisper==0.10.0
tqdm==4.56.0
faster-whisper==0.10.0
tqdm==4.56.0
ffmpeg-python==0.2.0

View File

@ -1,19 +1,19 @@
from setuptools import setup, find_packages
setup(
version="1.0",
name="bazarr-ai-sub-generator",
packages=find_packages(),
py_modules=["bazarr-ai-sub-generator"],
author="Karl Hudgell",
install_requires=[
'faster-whisper',
'tqdm',
'ffmpeg-python'
],
description="Automatically generate and embed subtitles into your videos",
entry_points={
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
},
include_package_data=True,
)
from setuptools import setup, find_packages
setup(
version="1.0",
name="bazarr-ai-sub-generator",
packages=find_packages(),
py_modules=["bazarr-ai-sub-generator"],
author="Karl Hudgell",
install_requires=[
'faster-whisper',
'tqdm',
'ffmpeg-python'
],
description="Automatically generate and embed subtitles into your videos",
entry_points={
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
},
include_package_data=True,
)