mirror of
https://github.com/karl0ss/bazarr-ai-sub-generator.git
synced 2025-10-20 02:04:00 +01:00
latest commit
This commit is contained in:
parent
77b28df03d
commit
5b27fdbc75
48
.github/workflows/pylint.yml
vendored
48
.github/workflows/pylint.yml
vendored
@ -1,24 +1,24 @@
|
||||
name: Pylint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint
|
||||
pip install -r requirements.txt
|
||||
- name: Analysing the code with pylint
|
||||
run: |
|
||||
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')
|
||||
name: Pylint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint
|
||||
pip install -r requirements.txt
|
||||
- name: Analysing the code with pylint
|
||||
run: |
|
||||
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')
|
||||
|
18
.gitignore
vendored
18
.gitignore
vendored
@ -1,9 +1,9 @@
|
||||
dist
|
||||
.DS_Store
|
||||
*.egg-info
|
||||
build
|
||||
__pycache__
|
||||
venv/
|
||||
test/
|
||||
.vscode/launch.json
|
||||
config.cfg
|
||||
dist
|
||||
.DS_Store
|
||||
*.egg-info
|
||||
build
|
||||
__pycache__
|
||||
venv/
|
||||
test/
|
||||
.vscode/launch.json
|
||||
config.cfg
|
||||
|
38
.vscode/launch.json
vendored
38
.vscode/launch.json
vendored
@ -1,20 +1,20 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": false,
|
||||
"args": [
|
||||
"--model",
|
||||
"base",
|
||||
],
|
||||
}
|
||||
]
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": false,
|
||||
"args": [
|
||||
"--model",
|
||||
"base",
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
44
LICENSE
44
LICENSE
@ -1,22 +1,22 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
|
||||
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
|
||||
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
100
README.md
100
README.md
@ -1,50 +1,50 @@
|
||||
# bazarr-ai-sub-generator
|
||||
|
||||
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
|
||||
|
||||
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
|
||||
|
||||
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
|
||||
|
||||
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
|
||||
|
||||
Clunky, and slow, but works.
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
|
||||
|
||||
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --model medium
|
||||
|
||||
Adding `--task translate` will translate the subtitles into English:
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --task translate
|
||||
|
||||
Run the following to view all available options:
|
||||
|
||||
faster_auto_subtitle --help
|
||||
|
||||
## Tips
|
||||
|
||||
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
|
||||
|
||||
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
|
||||
|
||||
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
|
||||
|
||||
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
|
||||
|
||||
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
|
||||
|
||||
## License
|
||||
|
||||
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->
|
||||
# bazarr-ai-sub-generator
|
||||
|
||||
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
|
||||
|
||||
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
|
||||
|
||||
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
|
||||
|
||||
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
|
||||
|
||||
Clunky, and slow, but works.
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
|
||||
|
||||
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --model medium
|
||||
|
||||
Adding `--task translate` will translate the subtitles into English:
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --task translate
|
||||
|
||||
Run the following to view all available options:
|
||||
|
||||
faster_auto_subtitle --help
|
||||
|
||||
## Tips
|
||||
|
||||
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
|
||||
|
||||
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
|
||||
|
||||
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
|
||||
|
||||
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
|
||||
|
||||
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
|
||||
|
||||
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
|
||||
|
||||
## License
|
||||
|
||||
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->
|
||||
|
@ -1,99 +1,106 @@
|
||||
import argparse
|
||||
from faster_whisper import available_models
|
||||
from utils.constants import LANGUAGE_CODES
|
||||
from main import process
|
||||
from utils.convert import str2bool, str2timeinterval
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point for the script.
|
||||
|
||||
Parses command line arguments, processes the inputs using the specified options,
|
||||
and performs transcription or translation based on the specified task.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--audio_channel", default="0", type=int, help="audio channel index to use"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample_interval",
|
||||
type=str2timeinterval,
|
||||
default=None,
|
||||
help="generate subtitles for a specific \
|
||||
fragment of the video (e.g. 01:02:05-01:03:45)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="small",
|
||||
choices=available_models(),
|
||||
help="name of the Whisper model to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=["cpu", "cuda", "auto"],
|
||||
help='Device to use for computation ("cpu", "cuda", "auto")',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compute_type",
|
||||
type=str,
|
||||
default="default",
|
||||
choices=[
|
||||
"int8",
|
||||
"int8_float32",
|
||||
"int8_float16",
|
||||
"int8_bfloat16",
|
||||
"int16",
|
||||
"float16",
|
||||
"bfloat16",
|
||||
"float32",
|
||||
],
|
||||
help="Type to use for computation. \
|
||||
See https://opennmt.net/CTranslate2/quantization.html.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--beam_size",
|
||||
type=int,
|
||||
default=5,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no_speech_threshold",
|
||||
type=float,
|
||||
default=0.6,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--condition_on_previous_text",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
type=str,
|
||||
default="transcribe",
|
||||
choices=["transcribe", "translate"],
|
||||
help="whether to perform X->X speech recognition ('transcribe') \
|
||||
or X->English translation ('translate')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--language",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=LANGUAGE_CODES,
|
||||
help="What is the origin language of the video? \
|
||||
If unset, it is detected automatically.",
|
||||
)
|
||||
|
||||
args = parser.parse_args().__dict__
|
||||
|
||||
process(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
import argparse
|
||||
from faster_whisper import available_models
|
||||
from utils.constants import LANGUAGE_CODES
|
||||
from main import process
|
||||
from utils.convert import str2bool, str2timeinterval
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point for the script.
|
||||
|
||||
Parses command line arguments, processes the inputs using the specified options,
|
||||
and performs transcription or translation based on the specified task.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--audio_channel", default="0", type=int, help="audio channel index to use"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample_interval",
|
||||
type=str2timeinterval,
|
||||
default=None,
|
||||
help="generate subtitles for a specific \
|
||||
fragment of the video (e.g. 01:02:05-01:03:45)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="small",
|
||||
choices=available_models(),
|
||||
help="name of the Whisper model to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=["cpu", "cuda", "auto"],
|
||||
help='Device to use for computation ("cpu", "cuda", "auto")',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compute_type",
|
||||
type=str,
|
||||
default="default",
|
||||
choices=[
|
||||
"int8",
|
||||
"int8_float32",
|
||||
"int8_float16",
|
||||
"int8_bfloat16",
|
||||
"int16",
|
||||
"float16",
|
||||
"bfloat16",
|
||||
"float32",
|
||||
],
|
||||
help="Type to use for computation. \
|
||||
See https://opennmt.net/CTranslate2/quantization.html.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--beam_size",
|
||||
type=int,
|
||||
default=5,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no_speech_threshold",
|
||||
type=float,
|
||||
default=0.6,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--condition_on_previous_text",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="model parameter, tweak to increase accuracy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
type=str,
|
||||
default="transcribe",
|
||||
choices=["transcribe", "translate"],
|
||||
help="whether to perform X->X speech recognition ('transcribe') \
|
||||
or X->English translation ('translate')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--language",
|
||||
type=str,
|
||||
default="auto",
|
||||
choices=LANGUAGE_CODES,
|
||||
help="What is the origin language of the video? \
|
||||
If unset, it is detected automatically.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of concurrent workers for processing episodes. \
|
||||
Increase for better CUDA utilization with multiple episodes.",
|
||||
)
|
||||
|
||||
args = parser.parse_args().__dict__
|
||||
|
||||
process(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,66 +1,122 @@
|
||||
import os
|
||||
import warnings
|
||||
import tempfile
|
||||
import time
|
||||
from utils.files import filename, write_srt
|
||||
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
|
||||
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
|
||||
from utils.sonarr import update_show_in_sonarr
|
||||
from utils.whisper import WhisperAI
|
||||
|
||||
|
||||
def process(args: dict):
|
||||
model_name: str = args.pop("model")
|
||||
language: str = args.pop("language")
|
||||
sample_interval: str = args.pop("sample_interval")
|
||||
audio_channel: str = args.pop("audio_channel")
|
||||
|
||||
if model_name.endswith(".en"):
|
||||
warnings.warn(
|
||||
f"{model_name} is an English-only model, forcing English detection."
|
||||
)
|
||||
args["language"] = "en"
|
||||
# if translate task used and language argument is set, then use it
|
||||
elif language != "auto":
|
||||
args["language"] = language
|
||||
|
||||
model_args = {}
|
||||
model_args["model_size_or_path"] = model_name
|
||||
model_args["device"] = args.pop("device")
|
||||
model_args["compute_type"] = args.pop("compute_type")
|
||||
|
||||
list_of_episodes_needing_subtitles = get_wanted_episodes()
|
||||
print(
|
||||
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
|
||||
)
|
||||
for episode in list_of_episodes_needing_subtitles["data"]:
|
||||
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
|
||||
episode_data = get_episode_details(episode["sonarrEpisodeId"])
|
||||
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
|
||||
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
|
||||
|
||||
add_subtitles_to_mp4(subtitles)
|
||||
update_show_in_sonarr(episode["sonarrSeriesId"])
|
||||
time.sleep(5)
|
||||
sync_series()
|
||||
|
||||
|
||||
def get_subtitles(
|
||||
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
|
||||
):
|
||||
model = WhisperAI(model_args, transcribe_args)
|
||||
|
||||
subtitles_path = {}
|
||||
|
||||
for path, audio_path in audio_paths.items():
|
||||
print(f"Generating subtitles for {filename(path)}... This might take a while.")
|
||||
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
|
||||
|
||||
segments = model.transcribe(audio_path)
|
||||
|
||||
with open(srt_path, "w", encoding="utf-8") as srt:
|
||||
write_srt(segments, file=srt)
|
||||
|
||||
subtitles_path[path] = srt_path
|
||||
|
||||
return subtitles_path
|
||||
import os
|
||||
import warnings
|
||||
import tempfile
|
||||
import time
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from utils.files import filename, write_srt
|
||||
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
|
||||
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
|
||||
from utils.sonarr import update_show_in_sonarr
|
||||
from utils.whisper import WhisperAI
|
||||
|
||||
|
||||
def process_episode(episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes):
|
||||
"""Process a single episode for subtitle generation."""
|
||||
episode_id = episode["sonarrEpisodeId"]
|
||||
|
||||
try:
|
||||
# Double-check that this episode is still wanted before processing
|
||||
current_wanted = get_wanted_episodes()
|
||||
still_wanted = any(ep["sonarrEpisodeId"] == episode_id for ep in current_wanted["data"])
|
||||
|
||||
if not still_wanted:
|
||||
processing_episodes.discard(episode_id)
|
||||
return f"Skipped (no longer wanted): {episode['seriesTitle']} - {episode['episode_number']}"
|
||||
|
||||
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
|
||||
episode_data = get_episode_details(episode_id)
|
||||
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
|
||||
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
|
||||
|
||||
add_subtitles_to_mp4(subtitles)
|
||||
update_show_in_sonarr(episode["sonarrSeriesId"])
|
||||
time.sleep(5)
|
||||
sync_series()
|
||||
|
||||
processing_episodes.discard(episode_id)
|
||||
completed_episodes.append(episode_id)
|
||||
return f"Completed: {episode['seriesTitle']} - {episode['episode_number']}"
|
||||
except Exception as e:
|
||||
processing_episodes.discard(episode_id)
|
||||
return f"Failed {episode['seriesTitle']} - {episode['episode_number']}: {str(e)}"
|
||||
|
||||
|
||||
def process(args: dict):
|
||||
model_name: str = args.pop("model")
|
||||
language: str = args.pop("language")
|
||||
sample_interval: str = args.pop("sample_interval")
|
||||
audio_channel: str = args.pop("audio_channel")
|
||||
workers: int = args.pop("workers", 1)
|
||||
|
||||
if model_name.endswith(".en"):
|
||||
warnings.warn(
|
||||
f"{model_name} is an English-only model, forcing English detection."
|
||||
)
|
||||
args["language"] = "en"
|
||||
# if translate task used and language argument is set, then use it
|
||||
elif language != "auto":
|
||||
args["language"] = language
|
||||
|
||||
model_args = {}
|
||||
model_args["model_size_or_path"] = model_name
|
||||
model_args["device"] = args.pop("device")
|
||||
model_args["compute_type"] = args.pop("compute_type")
|
||||
|
||||
list_of_episodes_needing_subtitles = get_wanted_episodes()
|
||||
print(
|
||||
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
|
||||
)
|
||||
print(f"Processing with {workers} concurrent worker(s)...")
|
||||
|
||||
# Thread-safe tracking of episodes being processed and completed
|
||||
processing_episodes = set()
|
||||
completed_episodes_list = []
|
||||
total_episodes = len(list_of_episodes_needing_subtitles["data"])
|
||||
|
||||
# Filter episodes to avoid duplicates and respect concurrent processing limits
|
||||
episodes_to_process = []
|
||||
for episode in list_of_episodes_needing_subtitles["data"]:
|
||||
episode_id = episode["sonarrEpisodeId"]
|
||||
if episode_id not in processing_episodes:
|
||||
processing_episodes.add(episode_id)
|
||||
episodes_to_process.append(episode)
|
||||
|
||||
print(f"Starting processing of {len(episodes_to_process)} unique episodes...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||
# Submit episodes for processing with tracking sets
|
||||
future_to_episode = {
|
||||
executor.submit(process_episode, episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes_list): episode
|
||||
for episode in episodes_to_process
|
||||
}
|
||||
|
||||
# Collect results as they complete
|
||||
completed_count = 0
|
||||
for future in as_completed(future_to_episode):
|
||||
completed_count += 1
|
||||
result = future.result()
|
||||
print(f"[{completed_count}/{total_episodes}] {result}")
|
||||
|
||||
print(f"Processing complete. {len(completed_episodes_list)} episodes processed successfully.")
|
||||
|
||||
|
||||
def get_subtitles(
|
||||
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
|
||||
):
|
||||
model = WhisperAI(model_args, transcribe_args)
|
||||
|
||||
subtitles_path = {}
|
||||
|
||||
for path, audio_path in audio_paths.items():
|
||||
print(f"Generating subtitles for {filename(path)}... This might take a while.")
|
||||
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
|
||||
|
||||
segments = model.transcribe(audio_path)
|
||||
|
||||
with open(srt_path, "w", encoding="utf-8") as srt:
|
||||
write_srt(segments, file=srt)
|
||||
|
||||
subtitles_path[path] = srt_path
|
||||
|
||||
return subtitles_path
|
||||
|
@ -1,40 +1,40 @@
|
||||
import requests
|
||||
import configparser
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
config.read("config.cfg")
|
||||
|
||||
token = config._sections["bazarr"]["token"]
|
||||
base_url = config._sections["bazarr"]["url"]
|
||||
|
||||
|
||||
def get_wanted_episodes():
|
||||
url = f"{base_url}/api/episodes/wanted"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
|
||||
return response.json()
|
||||
|
||||
|
||||
def get_episode_details(episode_id: str):
|
||||
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
return response.json()["data"][0]
|
||||
|
||||
|
||||
def sync_series():
|
||||
url = f"{base_url}/api/system/tasks?taskid=update_series"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
if response.status_code == 204:
|
||||
print("Updated Bazarr")
|
||||
import requests
|
||||
import configparser
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
config.read("config.cfg")
|
||||
|
||||
token = config._sections["bazarr"]["token"]
|
||||
base_url = config._sections["bazarr"]["url"]
|
||||
|
||||
|
||||
def get_wanted_episodes():
|
||||
url = f"{base_url}/api/episodes/wanted"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
|
||||
return response.json()
|
||||
|
||||
|
||||
def get_episode_details(episode_id: str):
|
||||
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
return response.json()["data"][0]
|
||||
|
||||
|
||||
def sync_series():
|
||||
url = f"{base_url}/api/system/tasks?taskid=update_series"
|
||||
|
||||
payload = {}
|
||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
if response.status_code == 204:
|
||||
print("Updated Bazarr")
|
||||
|
@ -1,105 +1,105 @@
|
||||
"""
|
||||
List of available language codes
|
||||
"""
|
||||
LANGUAGE_CODES = [
|
||||
"af",
|
||||
"am",
|
||||
"ar",
|
||||
"as",
|
||||
"az",
|
||||
"ba",
|
||||
"be",
|
||||
"bg",
|
||||
"bn",
|
||||
"bo",
|
||||
"br",
|
||||
"bs",
|
||||
"ca",
|
||||
"cs",
|
||||
"cy",
|
||||
"da",
|
||||
"de",
|
||||
"el",
|
||||
"en",
|
||||
"es",
|
||||
"et",
|
||||
"eu",
|
||||
"fa",
|
||||
"fi",
|
||||
"fo",
|
||||
"fr",
|
||||
"gl",
|
||||
"gu",
|
||||
"ha",
|
||||
"haw",
|
||||
"he",
|
||||
"hi",
|
||||
"hr",
|
||||
"ht",
|
||||
"hu",
|
||||
"hy",
|
||||
"id",
|
||||
"is",
|
||||
"it",
|
||||
"ja",
|
||||
"jw",
|
||||
"ka",
|
||||
"kk",
|
||||
"km",
|
||||
"kn",
|
||||
"ko",
|
||||
"la",
|
||||
"lb",
|
||||
"ln",
|
||||
"lo",
|
||||
"lt",
|
||||
"lv",
|
||||
"mg",
|
||||
"mi",
|
||||
"mk",
|
||||
"ml",
|
||||
"mn",
|
||||
"mr",
|
||||
"ms",
|
||||
"mt",
|
||||
"my",
|
||||
"ne",
|
||||
"nl",
|
||||
"nn",
|
||||
"no",
|
||||
"oc",
|
||||
"pa",
|
||||
"pl",
|
||||
"ps",
|
||||
"pt",
|
||||
"ro",
|
||||
"ru",
|
||||
"sa",
|
||||
"sd",
|
||||
"si",
|
||||
"sk",
|
||||
"sl",
|
||||
"sn",
|
||||
"so",
|
||||
"sq",
|
||||
"sr",
|
||||
"su",
|
||||
"sv",
|
||||
"sw",
|
||||
"ta",
|
||||
"te",
|
||||
"tg",
|
||||
"th",
|
||||
"tk",
|
||||
"tl",
|
||||
"tr",
|
||||
"tt",
|
||||
"uk",
|
||||
"ur",
|
||||
"uz",
|
||||
"vi",
|
||||
"yi",
|
||||
"yo",
|
||||
"zh",
|
||||
"yue",
|
||||
]
|
||||
"""
|
||||
List of available language codes
|
||||
"""
|
||||
LANGUAGE_CODES = [
|
||||
"af",
|
||||
"am",
|
||||
"ar",
|
||||
"as",
|
||||
"az",
|
||||
"ba",
|
||||
"be",
|
||||
"bg",
|
||||
"bn",
|
||||
"bo",
|
||||
"br",
|
||||
"bs",
|
||||
"ca",
|
||||
"cs",
|
||||
"cy",
|
||||
"da",
|
||||
"de",
|
||||
"el",
|
||||
"en",
|
||||
"es",
|
||||
"et",
|
||||
"eu",
|
||||
"fa",
|
||||
"fi",
|
||||
"fo",
|
||||
"fr",
|
||||
"gl",
|
||||
"gu",
|
||||
"ha",
|
||||
"haw",
|
||||
"he",
|
||||
"hi",
|
||||
"hr",
|
||||
"ht",
|
||||
"hu",
|
||||
"hy",
|
||||
"id",
|
||||
"is",
|
||||
"it",
|
||||
"ja",
|
||||
"jw",
|
||||
"ka",
|
||||
"kk",
|
||||
"km",
|
||||
"kn",
|
||||
"ko",
|
||||
"la",
|
||||
"lb",
|
||||
"ln",
|
||||
"lo",
|
||||
"lt",
|
||||
"lv",
|
||||
"mg",
|
||||
"mi",
|
||||
"mk",
|
||||
"ml",
|
||||
"mn",
|
||||
"mr",
|
||||
"ms",
|
||||
"mt",
|
||||
"my",
|
||||
"ne",
|
||||
"nl",
|
||||
"nn",
|
||||
"no",
|
||||
"oc",
|
||||
"pa",
|
||||
"pl",
|
||||
"ps",
|
||||
"pt",
|
||||
"ro",
|
||||
"ru",
|
||||
"sa",
|
||||
"sd",
|
||||
"si",
|
||||
"sk",
|
||||
"sl",
|
||||
"sn",
|
||||
"so",
|
||||
"sq",
|
||||
"sr",
|
||||
"su",
|
||||
"sv",
|
||||
"sw",
|
||||
"ta",
|
||||
"te",
|
||||
"tg",
|
||||
"th",
|
||||
"tk",
|
||||
"tl",
|
||||
"tr",
|
||||
"tt",
|
||||
"uk",
|
||||
"ur",
|
||||
"uz",
|
||||
"vi",
|
||||
"yi",
|
||||
"yo",
|
||||
"zh",
|
||||
"yue",
|
||||
]
|
||||
|
@ -1,92 +1,92 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
def str2bool(string: str):
|
||||
string = string.lower()
|
||||
str2val = {"true": True, "false": False}
|
||||
|
||||
if string in str2val:
|
||||
return str2val[string]
|
||||
|
||||
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
||||
|
||||
|
||||
def str2timeinterval(string: str):
|
||||
if string is None:
|
||||
return None
|
||||
|
||||
if "-" not in string:
|
||||
raise ValueError(
|
||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||
)
|
||||
|
||||
intervals = string.split("-")
|
||||
if len(intervals) != 2:
|
||||
raise ValueError(
|
||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||
)
|
||||
|
||||
start = try_parse_timestamp(intervals[0])
|
||||
end = try_parse_timestamp(intervals[1])
|
||||
if start >= end:
|
||||
raise ValueError(
|
||||
f"Expected time interval end to be higher than start, got {start} >= {end}"
|
||||
)
|
||||
|
||||
return [start, end]
|
||||
|
||||
|
||||
def time_to_timestamp(string: str):
|
||||
split_time = string.split(":")
|
||||
if (
|
||||
len(split_time) == 0
|
||||
or len(split_time) > 3
|
||||
or not all(x.isdigit() for x in split_time)
|
||||
):
|
||||
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
|
||||
|
||||
if len(split_time) == 1:
|
||||
return int(split_time[0])
|
||||
|
||||
if len(split_time) == 2:
|
||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
|
||||
|
||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
|
||||
|
||||
|
||||
def try_parse_timestamp(string: str):
|
||||
timestamp = parse_timestamp(string, "%H:%M:%S")
|
||||
if timestamp is not None:
|
||||
return timestamp
|
||||
|
||||
timestamp = parse_timestamp(string, "%H:%M")
|
||||
if timestamp is not None:
|
||||
return timestamp
|
||||
|
||||
return parse_timestamp(string, "%S")
|
||||
|
||||
|
||||
def parse_timestamp(string: str, pattern: str):
|
||||
try:
|
||||
date = datetime.strptime(string, pattern)
|
||||
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
|
||||
return int(delta.total_seconds())
|
||||
except: # pylint: disable=bare-except
|
||||
return None
|
||||
|
||||
|
||||
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
||||
assert seconds >= 0, "non-negative timestamp expected"
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
|
||||
hours = milliseconds // 3_600_000
|
||||
milliseconds -= hours * 3_600_000
|
||||
|
||||
minutes = milliseconds // 60_000
|
||||
milliseconds -= minutes * 60_000
|
||||
|
||||
seconds = milliseconds // 1_000
|
||||
milliseconds -= seconds * 1_000
|
||||
|
||||
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
|
||||
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
def str2bool(string: str):
|
||||
string = string.lower()
|
||||
str2val = {"true": True, "false": False}
|
||||
|
||||
if string in str2val:
|
||||
return str2val[string]
|
||||
|
||||
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
||||
|
||||
|
||||
def str2timeinterval(string: str):
|
||||
if string is None:
|
||||
return None
|
||||
|
||||
if "-" not in string:
|
||||
raise ValueError(
|
||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||
)
|
||||
|
||||
intervals = string.split("-")
|
||||
if len(intervals) != 2:
|
||||
raise ValueError(
|
||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||
)
|
||||
|
||||
start = try_parse_timestamp(intervals[0])
|
||||
end = try_parse_timestamp(intervals[1])
|
||||
if start >= end:
|
||||
raise ValueError(
|
||||
f"Expected time interval end to be higher than start, got {start} >= {end}"
|
||||
)
|
||||
|
||||
return [start, end]
|
||||
|
||||
|
||||
def time_to_timestamp(string: str):
|
||||
split_time = string.split(":")
|
||||
if (
|
||||
len(split_time) == 0
|
||||
or len(split_time) > 3
|
||||
or not all(x.isdigit() for x in split_time)
|
||||
):
|
||||
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
|
||||
|
||||
if len(split_time) == 1:
|
||||
return int(split_time[0])
|
||||
|
||||
if len(split_time) == 2:
|
||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
|
||||
|
||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
|
||||
|
||||
|
||||
def try_parse_timestamp(string: str):
|
||||
timestamp = parse_timestamp(string, "%H:%M:%S")
|
||||
if timestamp is not None:
|
||||
return timestamp
|
||||
|
||||
timestamp = parse_timestamp(string, "%H:%M")
|
||||
if timestamp is not None:
|
||||
return timestamp
|
||||
|
||||
return parse_timestamp(string, "%S")
|
||||
|
||||
|
||||
def parse_timestamp(string: str, pattern: str):
|
||||
try:
|
||||
date = datetime.strptime(string, pattern)
|
||||
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
|
||||
return int(delta.total_seconds())
|
||||
except: # pylint: disable=bare-except
|
||||
return None
|
||||
|
||||
|
||||
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
||||
assert seconds >= 0, "non-negative timestamp expected"
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
|
||||
hours = milliseconds // 3_600_000
|
||||
milliseconds -= hours * 3_600_000
|
||||
|
||||
minutes = milliseconds // 60_000
|
||||
milliseconds -= minutes * 60_000
|
||||
|
||||
seconds = milliseconds // 1_000
|
||||
milliseconds -= seconds * 1_000
|
||||
|
||||
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
|
||||
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||||
|
@ -1,59 +1,59 @@
|
||||
import os
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
from .files import filename
|
||||
|
||||
|
||||
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
audio_paths = {}
|
||||
|
||||
for path in paths:
|
||||
print(f"Extracting audio from {filename(path)}...")
|
||||
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
|
||||
|
||||
ffmpeg_input_args = {}
|
||||
if sample_interval is not None:
|
||||
ffmpeg_input_args["ss"] = str(sample_interval[0])
|
||||
|
||||
ffmpeg_output_args = {}
|
||||
ffmpeg_output_args["acodec"] = "pcm_s16le"
|
||||
ffmpeg_output_args["ac"] = "1"
|
||||
ffmpeg_output_args["ar"] = "16k"
|
||||
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
|
||||
if sample_interval is not None:
|
||||
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
|
||||
|
||||
ffmpeg.input(path, **ffmpeg_input_args).output(
|
||||
output_path, **ffmpeg_output_args
|
||||
).run(quiet=True, overwrite_output=True)
|
||||
|
||||
audio_paths[path] = output_path
|
||||
|
||||
return audio_paths
|
||||
|
||||
|
||||
def add_subtitles_to_mp4(subtitles: dict):
|
||||
input_file = list(subtitles.keys())[0]
|
||||
subtitle_file = subtitles[input_file]
|
||||
output_file = input_file
|
||||
os.rename(input_file, input_file + "_edit")
|
||||
|
||||
input_stream = ffmpeg.input(input_file + "_edit")
|
||||
subtitle_stream = ffmpeg.input(subtitle_file)
|
||||
|
||||
# Combine input video and subtitle
|
||||
output = ffmpeg.output(
|
||||
input_stream,
|
||||
subtitle_stream,
|
||||
output_file.replace(".mkv", ".mp4"),
|
||||
c="copy",
|
||||
**{"c:s": "mov_text"},
|
||||
**{"metadata:s:s:0": "language=eng"},
|
||||
)
|
||||
ffmpeg.run(output, quiet=True, overwrite_output=True)
|
||||
os.remove(input_file + "_edit")
|
||||
# remove tempfiles
|
||||
os.remove(subtitle_file)
|
||||
os.remove(subtitle_file.replace(".srt", ".wav"))
|
||||
import os
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
from .files import filename
|
||||
|
||||
|
||||
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
audio_paths = {}
|
||||
|
||||
for path in paths:
|
||||
print(f"Extracting audio from {filename(path)}...")
|
||||
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
|
||||
|
||||
ffmpeg_input_args = {}
|
||||
if sample_interval is not None:
|
||||
ffmpeg_input_args["ss"] = str(sample_interval[0])
|
||||
|
||||
ffmpeg_output_args = {}
|
||||
ffmpeg_output_args["acodec"] = "pcm_s16le"
|
||||
ffmpeg_output_args["ac"] = "1"
|
||||
ffmpeg_output_args["ar"] = "16k"
|
||||
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
|
||||
if sample_interval is not None:
|
||||
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
|
||||
|
||||
ffmpeg.input(path, **ffmpeg_input_args).output(
|
||||
output_path, **ffmpeg_output_args
|
||||
).run(quiet=True, overwrite_output=True)
|
||||
|
||||
audio_paths[path] = output_path
|
||||
|
||||
return audio_paths
|
||||
|
||||
|
||||
def add_subtitles_to_mp4(subtitles: dict):
|
||||
input_file = list(subtitles.keys())[0]
|
||||
subtitle_file = subtitles[input_file]
|
||||
output_file = input_file
|
||||
os.rename(input_file, input_file + "_edit")
|
||||
|
||||
input_stream = ffmpeg.input(input_file + "_edit")
|
||||
subtitle_stream = ffmpeg.input(subtitle_file)
|
||||
|
||||
# Combine input video and subtitle
|
||||
output = ffmpeg.output(
|
||||
input_stream,
|
||||
subtitle_stream,
|
||||
output_file.replace(".mkv", ".mp4"),
|
||||
c="copy",
|
||||
**{"c:s": "mov_text"},
|
||||
**{"metadata:s:s:0": "language=eng"},
|
||||
)
|
||||
ffmpeg.run(output, quiet=True, overwrite_output=True)
|
||||
os.remove(input_file + "_edit")
|
||||
# remove tempfiles
|
||||
os.remove(subtitle_file)
|
||||
os.remove(subtitle_file.replace(".srt", ".wav"))
|
||||
|
@ -1,19 +1,19 @@
|
||||
import os
|
||||
from typing import Iterator, TextIO
|
||||
from .convert import format_timestamp
|
||||
|
||||
|
||||
def write_srt(transcript: Iterator[dict], file: TextIO):
|
||||
for i, segment in enumerate(transcript, start=1):
|
||||
print(
|
||||
f"{i}\n"
|
||||
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
|
||||
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
|
||||
f"{segment.text.strip().replace('-->', '->')}\n",
|
||||
file=file,
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def filename(path: str):
|
||||
return os.path.splitext(os.path.basename(path))[0]
|
||||
import os
|
||||
from typing import Iterator, TextIO
|
||||
from .convert import format_timestamp
|
||||
|
||||
|
||||
def write_srt(transcript: Iterator[dict], file: TextIO):
|
||||
for i, segment in enumerate(transcript, start=1):
|
||||
print(
|
||||
f"{i}\n"
|
||||
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
|
||||
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
|
||||
f"{segment.text.strip().replace('-->', '->')}\n",
|
||||
file=file,
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def filename(path: str):
|
||||
return os.path.splitext(os.path.basename(path))[0]
|
||||
|
@ -1,24 +1,24 @@
|
||||
import requests
|
||||
import json
|
||||
import configparser
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
config.read("config.cfg")
|
||||
|
||||
token = config._sections["sonarr"]["token"]
|
||||
base_url = config._sections["sonarr"]["url"]
|
||||
|
||||
|
||||
def update_show_in_sonarr(show_id):
|
||||
url = f"{base_url}/api/v3/command"
|
||||
|
||||
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-Key": token,
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
|
||||
if response.status_code != 404:
|
||||
print("Updated show in Sonarr")
|
||||
import requests
|
||||
import json
|
||||
import configparser
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
config.read("config.cfg")
|
||||
|
||||
token = config._sections["sonarr"]["token"]
|
||||
base_url = config._sections["sonarr"]["url"]
|
||||
|
||||
|
||||
def update_show_in_sonarr(show_id):
|
||||
url = f"{base_url}/api/v3/command"
|
||||
|
||||
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-Key": token,
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
|
||||
if response.status_code != 404:
|
||||
print("Updated show in Sonarr")
|
||||
|
@ -1,66 +1,66 @@
|
||||
import warnings
|
||||
import faster_whisper
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# pylint: disable=R0903
|
||||
class WhisperAI:
|
||||
"""
|
||||
Wrapper class for the Whisper speech recognition model with additional functionality.
|
||||
|
||||
This class provides a high-level interface for transcribing audio files using the Whisper
|
||||
speech recognition model. It encapsulates the model instantiation and transcription process,
|
||||
allowing users to easily transcribe audio files and iterate over the resulting segments.
|
||||
|
||||
Usage:
|
||||
```python
|
||||
whisper = WhisperAI(model_args, transcribe_args)
|
||||
|
||||
# Transcribe an audio file and iterate over the segments
|
||||
for segment in whisper.transcribe(audio_path):
|
||||
# Process each transcription segment
|
||||
print(segment)
|
||||
```
|
||||
|
||||
Args:
|
||||
- model_args: Arguments to pass to WhisperModel initialize method
|
||||
- model_size_or_path (str): The name of the Whisper model to use.
|
||||
- device (str): The device to use for computation ("cpu", "cuda", "auto").
|
||||
- compute_type (str): The type to use for computation.
|
||||
See https://opennmt.net/CTranslate2/quantization.html.
|
||||
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
|
||||
|
||||
Attributes:
|
||||
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
|
||||
- transcribe_args (dict): Additional arguments used for transcribe method.
|
||||
|
||||
Methods:
|
||||
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
|
||||
"""
|
||||
|
||||
def __init__(self, model_args: dict, transcribe_args: dict):
|
||||
self.model = faster_whisper.WhisperModel(**model_args)
|
||||
self.transcribe_args = transcribe_args
|
||||
|
||||
def transcribe(self, audio_path: str):
|
||||
"""
|
||||
Transcribes the specified audio file and yields the resulting segments.
|
||||
|
||||
Args:
|
||||
- audio_path (str): The path to the audio file for transcription.
|
||||
|
||||
Yields:
|
||||
- faster_whisper.TranscriptionSegment: An individual transcription segment.
|
||||
"""
|
||||
warnings.filterwarnings("ignore")
|
||||
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
|
||||
warnings.filterwarnings("default")
|
||||
|
||||
# Same precision as the Whisper timestamps.
|
||||
total_duration = round(info.duration, 2)
|
||||
|
||||
with tqdm(total=total_duration, unit=" seconds") as pbar:
|
||||
for segment in segments:
|
||||
yield segment
|
||||
pbar.update(segment.end - segment.start)
|
||||
pbar.update(0)
|
||||
import warnings
|
||||
import faster_whisper
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# pylint: disable=R0903
|
||||
class WhisperAI:
|
||||
"""
|
||||
Wrapper class for the Whisper speech recognition model with additional functionality.
|
||||
|
||||
This class provides a high-level interface for transcribing audio files using the Whisper
|
||||
speech recognition model. It encapsulates the model instantiation and transcription process,
|
||||
allowing users to easily transcribe audio files and iterate over the resulting segments.
|
||||
|
||||
Usage:
|
||||
```python
|
||||
whisper = WhisperAI(model_args, transcribe_args)
|
||||
|
||||
# Transcribe an audio file and iterate over the segments
|
||||
for segment in whisper.transcribe(audio_path):
|
||||
# Process each transcription segment
|
||||
print(segment)
|
||||
```
|
||||
|
||||
Args:
|
||||
- model_args: Arguments to pass to WhisperModel initialize method
|
||||
- model_size_or_path (str): The name of the Whisper model to use.
|
||||
- device (str): The device to use for computation ("cpu", "cuda", "auto").
|
||||
- compute_type (str): The type to use for computation.
|
||||
See https://opennmt.net/CTranslate2/quantization.html.
|
||||
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
|
||||
|
||||
Attributes:
|
||||
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
|
||||
- transcribe_args (dict): Additional arguments used for transcribe method.
|
||||
|
||||
Methods:
|
||||
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
|
||||
"""
|
||||
|
||||
def __init__(self, model_args: dict, transcribe_args: dict):
|
||||
self.model = faster_whisper.WhisperModel(**model_args)
|
||||
self.transcribe_args = transcribe_args
|
||||
|
||||
def transcribe(self, audio_path: str):
|
||||
"""
|
||||
Transcribes the specified audio file and yields the resulting segments.
|
||||
|
||||
Args:
|
||||
- audio_path (str): The path to the audio file for transcription.
|
||||
|
||||
Yields:
|
||||
- faster_whisper.TranscriptionSegment: An individual transcription segment.
|
||||
"""
|
||||
warnings.filterwarnings("ignore")
|
||||
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
|
||||
warnings.filterwarnings("default")
|
||||
|
||||
# Same precision as the Whisper timestamps.
|
||||
total_duration = round(info.duration, 2)
|
||||
|
||||
with tqdm(total=total_duration, unit=" seconds") as pbar:
|
||||
for segment in segments:
|
||||
yield segment
|
||||
pbar.update(segment.end - segment.start)
|
||||
pbar.update(0)
|
||||
|
@ -1,6 +1,6 @@
|
||||
[bazarr]
|
||||
url = http://1.1.1.1
|
||||
token = djfkjadncdfjkanvfjkvandfj
|
||||
[sonarr]
|
||||
url = http://2.2.2.2:8989
|
||||
[bazarr]
|
||||
url = http://1.1.1.1
|
||||
token = djfkjadncdfjkanvfjkvandfj
|
||||
[sonarr]
|
||||
url = http://2.2.2.2:8989
|
||||
token = dfifdmnajcdnjcvaldnjlk
|
@ -1,3 +1,3 @@
|
||||
faster-whisper==0.10.0
|
||||
tqdm==4.56.0
|
||||
faster-whisper==0.10.0
|
||||
tqdm==4.56.0
|
||||
ffmpeg-python==0.2.0
|
38
setup.py
38
setup.py
@ -1,19 +1,19 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
version="1.0",
|
||||
name="bazarr-ai-sub-generator",
|
||||
packages=find_packages(),
|
||||
py_modules=["bazarr-ai-sub-generator"],
|
||||
author="Karl Hudgell",
|
||||
install_requires=[
|
||||
'faster-whisper',
|
||||
'tqdm',
|
||||
'ffmpeg-python'
|
||||
],
|
||||
description="Automatically generate and embed subtitles into your videos",
|
||||
entry_points={
|
||||
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
|
||||
},
|
||||
include_package_data=True,
|
||||
)
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
version="1.0",
|
||||
name="bazarr-ai-sub-generator",
|
||||
packages=find_packages(),
|
||||
py_modules=["bazarr-ai-sub-generator"],
|
||||
author="Karl Hudgell",
|
||||
install_requires=[
|
||||
'faster-whisper',
|
||||
'tqdm',
|
||||
'ffmpeg-python'
|
||||
],
|
||||
description="Automatically generate and embed subtitles into your videos",
|
||||
entry_points={
|
||||
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
|
||||
},
|
||||
include_package_data=True,
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user