mirror of
https://github.com/karl0ss/bazarr-ai-sub-generator.git
synced 2025-10-20 02:04:00 +01:00
latest commit
This commit is contained in:
parent
77b28df03d
commit
5b27fdbc75
48
.github/workflows/pylint.yml
vendored
48
.github/workflows/pylint.yml
vendored
@ -1,24 +1,24 @@
|
|||||||
name: Pylint
|
name: Pylint
|
||||||
|
|
||||||
on: [push]
|
on: [push]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.9"]
|
python-version: ["3.9"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v3
|
uses: actions/setup-python@v3
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install pylint
|
pip install pylint
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
- name: Analysing the code with pylint
|
- name: Analysing the code with pylint
|
||||||
run: |
|
run: |
|
||||||
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')
|
pylint --disable=C0114 --disable=C0115 --disable=C0116 $(git ls-files '*.py')
|
||||||
|
18
.gitignore
vendored
18
.gitignore
vendored
@ -1,9 +1,9 @@
|
|||||||
dist
|
dist
|
||||||
.DS_Store
|
.DS_Store
|
||||||
*.egg-info
|
*.egg-info
|
||||||
build
|
build
|
||||||
__pycache__
|
__pycache__
|
||||||
venv/
|
venv/
|
||||||
test/
|
test/
|
||||||
.vscode/launch.json
|
.vscode/launch.json
|
||||||
config.cfg
|
config.cfg
|
||||||
|
38
.vscode/launch.json
vendored
38
.vscode/launch.json
vendored
@ -1,20 +1,20 @@
|
|||||||
{
|
{
|
||||||
// Use IntelliSense to learn about possible attributes.
|
// Use IntelliSense to learn about possible attributes.
|
||||||
// Hover to view descriptions of existing attributes.
|
// Hover to view descriptions of existing attributes.
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: Current File",
|
"name": "Python: Current File",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
"args": [
|
"args": [
|
||||||
"--model",
|
"--model",
|
||||||
"base",
|
"base",
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
44
LICENSE
44
LICENSE
@ -1,22 +1,22 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
|
Copyright (c) 2022-2024 Miguel Piedrafita <soy@miguelpiedrafita.com>
|
||||||
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
|
Copyright (c) 2024 Sergey Chernyaev <schernyae@gmail.com>
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
The above copyright notice and this permission notice shall be included in all
|
||||||
copies or substantial portions of the Software.
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
|
100
README.md
100
README.md
@ -1,50 +1,50 @@
|
|||||||
# bazarr-ai-sub-generator
|
# bazarr-ai-sub-generator
|
||||||
|
|
||||||
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
|
This is a fork of [faster-auto-subtitle](https://github.com/Sirozha1337/faster-auto-subtitle) using [faster-whisper](https://github.com/SYSTRAN/faster-whisper) implementation.
|
||||||
|
|
||||||
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
|
This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
|
||||||
|
|
||||||
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
|
This script will connect to your Bazarr instance to get a list of shows that require subtitles and start processing each video to create, by default Engligh subs, these are then written to the file as Soft subtitles.
|
||||||
|
|
||||||
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
|
It will then send an update to Sonarr and once that is done update the file in Bazarr and move onto the next file.
|
||||||
|
|
||||||
Clunky, and slow, but works.
|
Clunky, and slow, but works.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
|
<!-- The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
|
||||||
|
|
||||||
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
|
faster_auto_subtitle /path/to/video.mp4 -o subtitled/
|
||||||
|
|
||||||
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
|
The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`.
|
||||||
|
|
||||||
faster_auto_subtitle /path/to/video.mp4 --model medium
|
faster_auto_subtitle /path/to/video.mp4 --model medium
|
||||||
|
|
||||||
Adding `--task translate` will translate the subtitles into English:
|
Adding `--task translate` will translate the subtitles into English:
|
||||||
|
|
||||||
faster_auto_subtitle /path/to/video.mp4 --task translate
|
faster_auto_subtitle /path/to/video.mp4 --task translate
|
||||||
|
|
||||||
Run the following to view all available options:
|
Run the following to view all available options:
|
||||||
|
|
||||||
faster_auto_subtitle --help
|
faster_auto_subtitle --help
|
||||||
|
|
||||||
## Tips
|
## Tips
|
||||||
|
|
||||||
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
|
The tool also exposes a couple of model parameters, that you can tweak to increase accuracy.
|
||||||
|
|
||||||
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
|
Higher `beam_size` usually leads to greater accuracy, but slows down the process.
|
||||||
|
|
||||||
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
|
Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.
|
||||||
|
|
||||||
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
|
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
|
||||||
|
|
||||||
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
|
You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:
|
||||||
|
|
||||||
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
|
faster_auto_subtitle /path/to/video.mp4 --model medium --sample_interval 00:05:30-00:07:00 --condition_on_previous_text False --beam_size 6 --no_speech_threshold 0.7
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->
|
This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. -->
|
||||||
|
@ -1,99 +1,106 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from faster_whisper import available_models
|
from faster_whisper import available_models
|
||||||
from utils.constants import LANGUAGE_CODES
|
from utils.constants import LANGUAGE_CODES
|
||||||
from main import process
|
from main import process
|
||||||
from utils.convert import str2bool, str2timeinterval
|
from utils.convert import str2bool, str2timeinterval
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
Main entry point for the script.
|
Main entry point for the script.
|
||||||
|
|
||||||
Parses command line arguments, processes the inputs using the specified options,
|
Parses command line arguments, processes the inputs using the specified options,
|
||||||
and performs transcription or translation based on the specified task.
|
and performs transcription or translation based on the specified task.
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--audio_channel", default="0", type=int, help="audio channel index to use"
|
"--audio_channel", default="0", type=int, help="audio channel index to use"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--sample_interval",
|
"--sample_interval",
|
||||||
type=str2timeinterval,
|
type=str2timeinterval,
|
||||||
default=None,
|
default=None,
|
||||||
help="generate subtitles for a specific \
|
help="generate subtitles for a specific \
|
||||||
fragment of the video (e.g. 01:02:05-01:03:45)",
|
fragment of the video (e.g. 01:02:05-01:03:45)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model",
|
"--model",
|
||||||
default="small",
|
default="small",
|
||||||
choices=available_models(),
|
choices=available_models(),
|
||||||
help="name of the Whisper model to use",
|
help="name of the Whisper model to use",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--device",
|
"--device",
|
||||||
type=str,
|
type=str,
|
||||||
default="auto",
|
default="auto",
|
||||||
choices=["cpu", "cuda", "auto"],
|
choices=["cpu", "cuda", "auto"],
|
||||||
help='Device to use for computation ("cpu", "cuda", "auto")',
|
help='Device to use for computation ("cpu", "cuda", "auto")',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--compute_type",
|
"--compute_type",
|
||||||
type=str,
|
type=str,
|
||||||
default="default",
|
default="default",
|
||||||
choices=[
|
choices=[
|
||||||
"int8",
|
"int8",
|
||||||
"int8_float32",
|
"int8_float32",
|
||||||
"int8_float16",
|
"int8_float16",
|
||||||
"int8_bfloat16",
|
"int8_bfloat16",
|
||||||
"int16",
|
"int16",
|
||||||
"float16",
|
"float16",
|
||||||
"bfloat16",
|
"bfloat16",
|
||||||
"float32",
|
"float32",
|
||||||
],
|
],
|
||||||
help="Type to use for computation. \
|
help="Type to use for computation. \
|
||||||
See https://opennmt.net/CTranslate2/quantization.html.",
|
See https://opennmt.net/CTranslate2/quantization.html.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--beam_size",
|
"--beam_size",
|
||||||
type=int,
|
type=int,
|
||||||
default=5,
|
default=5,
|
||||||
help="model parameter, tweak to increase accuracy",
|
help="model parameter, tweak to increase accuracy",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no_speech_threshold",
|
"--no_speech_threshold",
|
||||||
type=float,
|
type=float,
|
||||||
default=0.6,
|
default=0.6,
|
||||||
help="model parameter, tweak to increase accuracy",
|
help="model parameter, tweak to increase accuracy",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--condition_on_previous_text",
|
"--condition_on_previous_text",
|
||||||
type=str2bool,
|
type=str2bool,
|
||||||
default=True,
|
default=True,
|
||||||
help="model parameter, tweak to increase accuracy",
|
help="model parameter, tweak to increase accuracy",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--task",
|
"--task",
|
||||||
type=str,
|
type=str,
|
||||||
default="transcribe",
|
default="transcribe",
|
||||||
choices=["transcribe", "translate"],
|
choices=["transcribe", "translate"],
|
||||||
help="whether to perform X->X speech recognition ('transcribe') \
|
help="whether to perform X->X speech recognition ('transcribe') \
|
||||||
or X->English translation ('translate')",
|
or X->English translation ('translate')",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--language",
|
"--language",
|
||||||
type=str,
|
type=str,
|
||||||
default="auto",
|
default="auto",
|
||||||
choices=LANGUAGE_CODES,
|
choices=LANGUAGE_CODES,
|
||||||
help="What is the origin language of the video? \
|
help="What is the origin language of the video? \
|
||||||
If unset, it is detected automatically.",
|
If unset, it is detected automatically.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
args = parser.parse_args().__dict__
|
"--workers",
|
||||||
|
type=int,
|
||||||
process(args)
|
default=1,
|
||||||
|
help="Number of concurrent workers for processing episodes. \
|
||||||
|
Increase for better CUDA utilization with multiple episodes.",
|
||||||
if __name__ == "__main__":
|
)
|
||||||
main()
|
|
||||||
|
args = parser.parse_args().__dict__
|
||||||
|
|
||||||
|
process(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -1,66 +1,122 @@
|
|||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from utils.files import filename, write_srt
|
import threading
|
||||||
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
|
from utils.files import filename, write_srt
|
||||||
from utils.sonarr import update_show_in_sonarr
|
from utils.ffmpeg import get_audio, add_subtitles_to_mp4
|
||||||
from utils.whisper import WhisperAI
|
from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
|
||||||
|
from utils.sonarr import update_show_in_sonarr
|
||||||
|
from utils.whisper import WhisperAI
|
||||||
def process(args: dict):
|
|
||||||
model_name: str = args.pop("model")
|
|
||||||
language: str = args.pop("language")
|
def process_episode(episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes):
|
||||||
sample_interval: str = args.pop("sample_interval")
|
"""Process a single episode for subtitle generation."""
|
||||||
audio_channel: str = args.pop("audio_channel")
|
episode_id = episode["sonarrEpisodeId"]
|
||||||
|
|
||||||
if model_name.endswith(".en"):
|
try:
|
||||||
warnings.warn(
|
# Double-check that this episode is still wanted before processing
|
||||||
f"{model_name} is an English-only model, forcing English detection."
|
current_wanted = get_wanted_episodes()
|
||||||
)
|
still_wanted = any(ep["sonarrEpisodeId"] == episode_id for ep in current_wanted["data"])
|
||||||
args["language"] = "en"
|
|
||||||
# if translate task used and language argument is set, then use it
|
if not still_wanted:
|
||||||
elif language != "auto":
|
processing_episodes.discard(episode_id)
|
||||||
args["language"] = language
|
return f"Skipped (no longer wanted): {episode['seriesTitle']} - {episode['episode_number']}"
|
||||||
|
|
||||||
model_args = {}
|
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
|
||||||
model_args["model_size_or_path"] = model_name
|
episode_data = get_episode_details(episode_id)
|
||||||
model_args["device"] = args.pop("device")
|
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
|
||||||
model_args["compute_type"] = args.pop("compute_type")
|
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
|
||||||
|
|
||||||
list_of_episodes_needing_subtitles = get_wanted_episodes()
|
add_subtitles_to_mp4(subtitles)
|
||||||
print(
|
update_show_in_sonarr(episode["sonarrSeriesId"])
|
||||||
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
|
time.sleep(5)
|
||||||
)
|
sync_series()
|
||||||
for episode in list_of_episodes_needing_subtitles["data"]:
|
|
||||||
print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
|
processing_episodes.discard(episode_id)
|
||||||
episode_data = get_episode_details(episode["sonarrEpisodeId"])
|
completed_episodes.append(episode_id)
|
||||||
audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
|
return f"Completed: {episode['seriesTitle']} - {episode['episode_number']}"
|
||||||
subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
|
except Exception as e:
|
||||||
|
processing_episodes.discard(episode_id)
|
||||||
add_subtitles_to_mp4(subtitles)
|
return f"Failed {episode['seriesTitle']} - {episode['episode_number']}: {str(e)}"
|
||||||
update_show_in_sonarr(episode["sonarrSeriesId"])
|
|
||||||
time.sleep(5)
|
|
||||||
sync_series()
|
def process(args: dict):
|
||||||
|
model_name: str = args.pop("model")
|
||||||
|
language: str = args.pop("language")
|
||||||
def get_subtitles(
|
sample_interval: str = args.pop("sample_interval")
|
||||||
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
|
audio_channel: str = args.pop("audio_channel")
|
||||||
):
|
workers: int = args.pop("workers", 1)
|
||||||
model = WhisperAI(model_args, transcribe_args)
|
|
||||||
|
if model_name.endswith(".en"):
|
||||||
subtitles_path = {}
|
warnings.warn(
|
||||||
|
f"{model_name} is an English-only model, forcing English detection."
|
||||||
for path, audio_path in audio_paths.items():
|
)
|
||||||
print(f"Generating subtitles for {filename(path)}... This might take a while.")
|
args["language"] = "en"
|
||||||
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
|
# if translate task used and language argument is set, then use it
|
||||||
|
elif language != "auto":
|
||||||
segments = model.transcribe(audio_path)
|
args["language"] = language
|
||||||
|
|
||||||
with open(srt_path, "w", encoding="utf-8") as srt:
|
model_args = {}
|
||||||
write_srt(segments, file=srt)
|
model_args["model_size_or_path"] = model_name
|
||||||
|
model_args["device"] = args.pop("device")
|
||||||
subtitles_path[path] = srt_path
|
model_args["compute_type"] = args.pop("compute_type")
|
||||||
|
|
||||||
return subtitles_path
|
list_of_episodes_needing_subtitles = get_wanted_episodes()
|
||||||
|
print(
|
||||||
|
f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
|
||||||
|
)
|
||||||
|
print(f"Processing with {workers} concurrent worker(s)...")
|
||||||
|
|
||||||
|
# Thread-safe tracking of episodes being processed and completed
|
||||||
|
processing_episodes = set()
|
||||||
|
completed_episodes_list = []
|
||||||
|
total_episodes = len(list_of_episodes_needing_subtitles["data"])
|
||||||
|
|
||||||
|
# Filter episodes to avoid duplicates and respect concurrent processing limits
|
||||||
|
episodes_to_process = []
|
||||||
|
for episode in list_of_episodes_needing_subtitles["data"]:
|
||||||
|
episode_id = episode["sonarrEpisodeId"]
|
||||||
|
if episode_id not in processing_episodes:
|
||||||
|
processing_episodes.add(episode_id)
|
||||||
|
episodes_to_process.append(episode)
|
||||||
|
|
||||||
|
print(f"Starting processing of {len(episodes_to_process)} unique episodes...")
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||||
|
# Submit episodes for processing with tracking sets
|
||||||
|
future_to_episode = {
|
||||||
|
executor.submit(process_episode, episode, model_args, args, audio_channel, sample_interval, processing_episodes, completed_episodes_list): episode
|
||||||
|
for episode in episodes_to_process
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collect results as they complete
|
||||||
|
completed_count = 0
|
||||||
|
for future in as_completed(future_to_episode):
|
||||||
|
completed_count += 1
|
||||||
|
result = future.result()
|
||||||
|
print(f"[{completed_count}/{total_episodes}] {result}")
|
||||||
|
|
||||||
|
print(f"Processing complete. {len(completed_episodes_list)} episodes processed successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
def get_subtitles(
|
||||||
|
audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
|
||||||
|
):
|
||||||
|
model = WhisperAI(model_args, transcribe_args)
|
||||||
|
|
||||||
|
subtitles_path = {}
|
||||||
|
|
||||||
|
for path, audio_path in audio_paths.items():
|
||||||
|
print(f"Generating subtitles for {filename(path)}... This might take a while.")
|
||||||
|
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
|
||||||
|
|
||||||
|
segments = model.transcribe(audio_path)
|
||||||
|
|
||||||
|
with open(srt_path, "w", encoding="utf-8") as srt:
|
||||||
|
write_srt(segments, file=srt)
|
||||||
|
|
||||||
|
subtitles_path[path] = srt_path
|
||||||
|
|
||||||
|
return subtitles_path
|
||||||
|
@ -1,40 +1,40 @@
|
|||||||
import requests
|
import requests
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
config = configparser.RawConfigParser()
|
config = configparser.RawConfigParser()
|
||||||
config.read("config.cfg")
|
config.read("config.cfg")
|
||||||
|
|
||||||
token = config._sections["bazarr"]["token"]
|
token = config._sections["bazarr"]["token"]
|
||||||
base_url = config._sections["bazarr"]["url"]
|
base_url = config._sections["bazarr"]["url"]
|
||||||
|
|
||||||
|
|
||||||
def get_wanted_episodes():
|
def get_wanted_episodes():
|
||||||
url = f"{base_url}/api/episodes/wanted"
|
url = f"{base_url}/api/episodes/wanted"
|
||||||
|
|
||||||
payload = {}
|
payload = {}
|
||||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||||
|
|
||||||
response = requests.request("GET", url, headers=headers, data=payload)
|
response = requests.request("GET", url, headers=headers, data=payload)
|
||||||
|
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
def get_episode_details(episode_id: str):
|
def get_episode_details(episode_id: str):
|
||||||
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
|
url = f"{base_url}/api/episodes?episodeid%5B%5D={episode_id}"
|
||||||
|
|
||||||
payload = {}
|
payload = {}
|
||||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||||
|
|
||||||
response = requests.request("GET", url, headers=headers, data=payload)
|
response = requests.request("GET", url, headers=headers, data=payload)
|
||||||
return response.json()["data"][0]
|
return response.json()["data"][0]
|
||||||
|
|
||||||
|
|
||||||
def sync_series():
|
def sync_series():
|
||||||
url = f"{base_url}/api/system/tasks?taskid=update_series"
|
url = f"{base_url}/api/system/tasks?taskid=update_series"
|
||||||
|
|
||||||
payload = {}
|
payload = {}
|
||||||
headers = {"accept": "application/json", "X-API-KEY": token}
|
headers = {"accept": "application/json", "X-API-KEY": token}
|
||||||
|
|
||||||
response = requests.request("POST", url, headers=headers, data=payload)
|
response = requests.request("POST", url, headers=headers, data=payload)
|
||||||
if response.status_code == 204:
|
if response.status_code == 204:
|
||||||
print("Updated Bazarr")
|
print("Updated Bazarr")
|
||||||
|
@ -1,105 +1,105 @@
|
|||||||
"""
|
"""
|
||||||
List of available language codes
|
List of available language codes
|
||||||
"""
|
"""
|
||||||
LANGUAGE_CODES = [
|
LANGUAGE_CODES = [
|
||||||
"af",
|
"af",
|
||||||
"am",
|
"am",
|
||||||
"ar",
|
"ar",
|
||||||
"as",
|
"as",
|
||||||
"az",
|
"az",
|
||||||
"ba",
|
"ba",
|
||||||
"be",
|
"be",
|
||||||
"bg",
|
"bg",
|
||||||
"bn",
|
"bn",
|
||||||
"bo",
|
"bo",
|
||||||
"br",
|
"br",
|
||||||
"bs",
|
"bs",
|
||||||
"ca",
|
"ca",
|
||||||
"cs",
|
"cs",
|
||||||
"cy",
|
"cy",
|
||||||
"da",
|
"da",
|
||||||
"de",
|
"de",
|
||||||
"el",
|
"el",
|
||||||
"en",
|
"en",
|
||||||
"es",
|
"es",
|
||||||
"et",
|
"et",
|
||||||
"eu",
|
"eu",
|
||||||
"fa",
|
"fa",
|
||||||
"fi",
|
"fi",
|
||||||
"fo",
|
"fo",
|
||||||
"fr",
|
"fr",
|
||||||
"gl",
|
"gl",
|
||||||
"gu",
|
"gu",
|
||||||
"ha",
|
"ha",
|
||||||
"haw",
|
"haw",
|
||||||
"he",
|
"he",
|
||||||
"hi",
|
"hi",
|
||||||
"hr",
|
"hr",
|
||||||
"ht",
|
"ht",
|
||||||
"hu",
|
"hu",
|
||||||
"hy",
|
"hy",
|
||||||
"id",
|
"id",
|
||||||
"is",
|
"is",
|
||||||
"it",
|
"it",
|
||||||
"ja",
|
"ja",
|
||||||
"jw",
|
"jw",
|
||||||
"ka",
|
"ka",
|
||||||
"kk",
|
"kk",
|
||||||
"km",
|
"km",
|
||||||
"kn",
|
"kn",
|
||||||
"ko",
|
"ko",
|
||||||
"la",
|
"la",
|
||||||
"lb",
|
"lb",
|
||||||
"ln",
|
"ln",
|
||||||
"lo",
|
"lo",
|
||||||
"lt",
|
"lt",
|
||||||
"lv",
|
"lv",
|
||||||
"mg",
|
"mg",
|
||||||
"mi",
|
"mi",
|
||||||
"mk",
|
"mk",
|
||||||
"ml",
|
"ml",
|
||||||
"mn",
|
"mn",
|
||||||
"mr",
|
"mr",
|
||||||
"ms",
|
"ms",
|
||||||
"mt",
|
"mt",
|
||||||
"my",
|
"my",
|
||||||
"ne",
|
"ne",
|
||||||
"nl",
|
"nl",
|
||||||
"nn",
|
"nn",
|
||||||
"no",
|
"no",
|
||||||
"oc",
|
"oc",
|
||||||
"pa",
|
"pa",
|
||||||
"pl",
|
"pl",
|
||||||
"ps",
|
"ps",
|
||||||
"pt",
|
"pt",
|
||||||
"ro",
|
"ro",
|
||||||
"ru",
|
"ru",
|
||||||
"sa",
|
"sa",
|
||||||
"sd",
|
"sd",
|
||||||
"si",
|
"si",
|
||||||
"sk",
|
"sk",
|
||||||
"sl",
|
"sl",
|
||||||
"sn",
|
"sn",
|
||||||
"so",
|
"so",
|
||||||
"sq",
|
"sq",
|
||||||
"sr",
|
"sr",
|
||||||
"su",
|
"su",
|
||||||
"sv",
|
"sv",
|
||||||
"sw",
|
"sw",
|
||||||
"ta",
|
"ta",
|
||||||
"te",
|
"te",
|
||||||
"tg",
|
"tg",
|
||||||
"th",
|
"th",
|
||||||
"tk",
|
"tk",
|
||||||
"tl",
|
"tl",
|
||||||
"tr",
|
"tr",
|
||||||
"tt",
|
"tt",
|
||||||
"uk",
|
"uk",
|
||||||
"ur",
|
"ur",
|
||||||
"uz",
|
"uz",
|
||||||
"vi",
|
"vi",
|
||||||
"yi",
|
"yi",
|
||||||
"yo",
|
"yo",
|
||||||
"zh",
|
"zh",
|
||||||
"yue",
|
"yue",
|
||||||
]
|
]
|
||||||
|
@ -1,92 +1,92 @@
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
def str2bool(string: str):
|
def str2bool(string: str):
|
||||||
string = string.lower()
|
string = string.lower()
|
||||||
str2val = {"true": True, "false": False}
|
str2val = {"true": True, "false": False}
|
||||||
|
|
||||||
if string in str2val:
|
if string in str2val:
|
||||||
return str2val[string]
|
return str2val[string]
|
||||||
|
|
||||||
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
||||||
|
|
||||||
|
|
||||||
def str2timeinterval(string: str):
|
def str2timeinterval(string: str):
|
||||||
if string is None:
|
if string is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if "-" not in string:
|
if "-" not in string:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||||
)
|
)
|
||||||
|
|
||||||
intervals = string.split("-")
|
intervals = string.split("-")
|
||||||
if len(intervals) != 2:
|
if len(intervals) != 2:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
f"Expected time interval HH:mm:ss-HH:mm:ss or HH:mm-HH:mm or ss-ss, got {string}"
|
||||||
)
|
)
|
||||||
|
|
||||||
start = try_parse_timestamp(intervals[0])
|
start = try_parse_timestamp(intervals[0])
|
||||||
end = try_parse_timestamp(intervals[1])
|
end = try_parse_timestamp(intervals[1])
|
||||||
if start >= end:
|
if start >= end:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Expected time interval end to be higher than start, got {start} >= {end}"
|
f"Expected time interval end to be higher than start, got {start} >= {end}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return [start, end]
|
return [start, end]
|
||||||
|
|
||||||
|
|
||||||
def time_to_timestamp(string: str):
|
def time_to_timestamp(string: str):
|
||||||
split_time = string.split(":")
|
split_time = string.split(":")
|
||||||
if (
|
if (
|
||||||
len(split_time) == 0
|
len(split_time) == 0
|
||||||
or len(split_time) > 3
|
or len(split_time) > 3
|
||||||
or not all(x.isdigit() for x in split_time)
|
or not all(x.isdigit() for x in split_time)
|
||||||
):
|
):
|
||||||
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
|
raise ValueError(f"Expected HH:mm:ss or HH:mm or ss, got {string}")
|
||||||
|
|
||||||
if len(split_time) == 1:
|
if len(split_time) == 1:
|
||||||
return int(split_time[0])
|
return int(split_time[0])
|
||||||
|
|
||||||
if len(split_time) == 2:
|
if len(split_time) == 2:
|
||||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
|
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60
|
||||||
|
|
||||||
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
|
return int(split_time[0]) * 60 * 60 + int(split_time[1]) * 60 + int(split_time[2])
|
||||||
|
|
||||||
|
|
||||||
def try_parse_timestamp(string: str):
|
def try_parse_timestamp(string: str):
|
||||||
timestamp = parse_timestamp(string, "%H:%M:%S")
|
timestamp = parse_timestamp(string, "%H:%M:%S")
|
||||||
if timestamp is not None:
|
if timestamp is not None:
|
||||||
return timestamp
|
return timestamp
|
||||||
|
|
||||||
timestamp = parse_timestamp(string, "%H:%M")
|
timestamp = parse_timestamp(string, "%H:%M")
|
||||||
if timestamp is not None:
|
if timestamp is not None:
|
||||||
return timestamp
|
return timestamp
|
||||||
|
|
||||||
return parse_timestamp(string, "%S")
|
return parse_timestamp(string, "%S")
|
||||||
|
|
||||||
|
|
||||||
def parse_timestamp(string: str, pattern: str):
|
def parse_timestamp(string: str, pattern: str):
|
||||||
try:
|
try:
|
||||||
date = datetime.strptime(string, pattern)
|
date = datetime.strptime(string, pattern)
|
||||||
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
|
delta = timedelta(hours=date.hour, minutes=date.minute, seconds=date.second)
|
||||||
return int(delta.total_seconds())
|
return int(delta.total_seconds())
|
||||||
except: # pylint: disable=bare-except
|
except: # pylint: disable=bare-except
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
||||||
assert seconds >= 0, "non-negative timestamp expected"
|
assert seconds >= 0, "non-negative timestamp expected"
|
||||||
milliseconds = round(seconds * 1000.0)
|
milliseconds = round(seconds * 1000.0)
|
||||||
|
|
||||||
hours = milliseconds // 3_600_000
|
hours = milliseconds // 3_600_000
|
||||||
milliseconds -= hours * 3_600_000
|
milliseconds -= hours * 3_600_000
|
||||||
|
|
||||||
minutes = milliseconds // 60_000
|
minutes = milliseconds // 60_000
|
||||||
milliseconds -= minutes * 60_000
|
milliseconds -= minutes * 60_000
|
||||||
|
|
||||||
seconds = milliseconds // 1_000
|
seconds = milliseconds // 1_000
|
||||||
milliseconds -= seconds * 1_000
|
milliseconds -= seconds * 1_000
|
||||||
|
|
||||||
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
|
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
|
||||||
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||||||
|
@ -1,59 +1,59 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
from .files import filename
|
from .files import filename
|
||||||
|
|
||||||
|
|
||||||
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
|
def get_audio(paths: list, audio_channel_index: int, sample_interval: list):
|
||||||
temp_dir = tempfile.gettempdir()
|
temp_dir = tempfile.gettempdir()
|
||||||
|
|
||||||
audio_paths = {}
|
audio_paths = {}
|
||||||
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
print(f"Extracting audio from {filename(path)}...")
|
print(f"Extracting audio from {filename(path)}...")
|
||||||
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
|
output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
|
||||||
|
|
||||||
ffmpeg_input_args = {}
|
ffmpeg_input_args = {}
|
||||||
if sample_interval is not None:
|
if sample_interval is not None:
|
||||||
ffmpeg_input_args["ss"] = str(sample_interval[0])
|
ffmpeg_input_args["ss"] = str(sample_interval[0])
|
||||||
|
|
||||||
ffmpeg_output_args = {}
|
ffmpeg_output_args = {}
|
||||||
ffmpeg_output_args["acodec"] = "pcm_s16le"
|
ffmpeg_output_args["acodec"] = "pcm_s16le"
|
||||||
ffmpeg_output_args["ac"] = "1"
|
ffmpeg_output_args["ac"] = "1"
|
||||||
ffmpeg_output_args["ar"] = "16k"
|
ffmpeg_output_args["ar"] = "16k"
|
||||||
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
|
ffmpeg_output_args["map"] = "0:a:" + str(audio_channel_index)
|
||||||
if sample_interval is not None:
|
if sample_interval is not None:
|
||||||
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
|
ffmpeg_output_args["t"] = str(sample_interval[1] - sample_interval[0])
|
||||||
|
|
||||||
ffmpeg.input(path, **ffmpeg_input_args).output(
|
ffmpeg.input(path, **ffmpeg_input_args).output(
|
||||||
output_path, **ffmpeg_output_args
|
output_path, **ffmpeg_output_args
|
||||||
).run(quiet=True, overwrite_output=True)
|
).run(quiet=True, overwrite_output=True)
|
||||||
|
|
||||||
audio_paths[path] = output_path
|
audio_paths[path] = output_path
|
||||||
|
|
||||||
return audio_paths
|
return audio_paths
|
||||||
|
|
||||||
|
|
||||||
def add_subtitles_to_mp4(subtitles: dict):
|
def add_subtitles_to_mp4(subtitles: dict):
|
||||||
input_file = list(subtitles.keys())[0]
|
input_file = list(subtitles.keys())[0]
|
||||||
subtitle_file = subtitles[input_file]
|
subtitle_file = subtitles[input_file]
|
||||||
output_file = input_file
|
output_file = input_file
|
||||||
os.rename(input_file, input_file + "_edit")
|
os.rename(input_file, input_file + "_edit")
|
||||||
|
|
||||||
input_stream = ffmpeg.input(input_file + "_edit")
|
input_stream = ffmpeg.input(input_file + "_edit")
|
||||||
subtitle_stream = ffmpeg.input(subtitle_file)
|
subtitle_stream = ffmpeg.input(subtitle_file)
|
||||||
|
|
||||||
# Combine input video and subtitle
|
# Combine input video and subtitle
|
||||||
output = ffmpeg.output(
|
output = ffmpeg.output(
|
||||||
input_stream,
|
input_stream,
|
||||||
subtitle_stream,
|
subtitle_stream,
|
||||||
output_file.replace(".mkv", ".mp4"),
|
output_file.replace(".mkv", ".mp4"),
|
||||||
c="copy",
|
c="copy",
|
||||||
**{"c:s": "mov_text"},
|
**{"c:s": "mov_text"},
|
||||||
**{"metadata:s:s:0": "language=eng"},
|
**{"metadata:s:s:0": "language=eng"},
|
||||||
)
|
)
|
||||||
ffmpeg.run(output, quiet=True, overwrite_output=True)
|
ffmpeg.run(output, quiet=True, overwrite_output=True)
|
||||||
os.remove(input_file + "_edit")
|
os.remove(input_file + "_edit")
|
||||||
# remove tempfiles
|
# remove tempfiles
|
||||||
os.remove(subtitle_file)
|
os.remove(subtitle_file)
|
||||||
os.remove(subtitle_file.replace(".srt", ".wav"))
|
os.remove(subtitle_file.replace(".srt", ".wav"))
|
||||||
|
@ -1,19 +1,19 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Iterator, TextIO
|
from typing import Iterator, TextIO
|
||||||
from .convert import format_timestamp
|
from .convert import format_timestamp
|
||||||
|
|
||||||
|
|
||||||
def write_srt(transcript: Iterator[dict], file: TextIO):
|
def write_srt(transcript: Iterator[dict], file: TextIO):
|
||||||
for i, segment in enumerate(transcript, start=1):
|
for i, segment in enumerate(transcript, start=1):
|
||||||
print(
|
print(
|
||||||
f"{i}\n"
|
f"{i}\n"
|
||||||
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
|
f"{format_timestamp(segment.start, always_include_hours=True)} --> "
|
||||||
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
|
f"{format_timestamp(segment.end, always_include_hours=True)}\n"
|
||||||
f"{segment.text.strip().replace('-->', '->')}\n",
|
f"{segment.text.strip().replace('-->', '->')}\n",
|
||||||
file=file,
|
file=file,
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def filename(path: str):
|
def filename(path: str):
|
||||||
return os.path.splitext(os.path.basename(path))[0]
|
return os.path.splitext(os.path.basename(path))[0]
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
config = configparser.RawConfigParser()
|
config = configparser.RawConfigParser()
|
||||||
config.read("config.cfg")
|
config.read("config.cfg")
|
||||||
|
|
||||||
token = config._sections["sonarr"]["token"]
|
token = config._sections["sonarr"]["token"]
|
||||||
base_url = config._sections["sonarr"]["url"]
|
base_url = config._sections["sonarr"]["url"]
|
||||||
|
|
||||||
|
|
||||||
def update_show_in_sonarr(show_id):
|
def update_show_in_sonarr(show_id):
|
||||||
url = f"{base_url}/api/v3/command"
|
url = f"{base_url}/api/v3/command"
|
||||||
|
|
||||||
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
|
payload = json.dumps({"name": "RefreshSeries", "seriesId": show_id})
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"X-Api-Key": token,
|
"X-Api-Key": token,
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.request("POST", url, headers=headers, data=payload)
|
response = requests.request("POST", url, headers=headers, data=payload)
|
||||||
|
|
||||||
if response.status_code != 404:
|
if response.status_code != 404:
|
||||||
print("Updated show in Sonarr")
|
print("Updated show in Sonarr")
|
||||||
|
@ -1,66 +1,66 @@
|
|||||||
import warnings
|
import warnings
|
||||||
import faster_whisper
|
import faster_whisper
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=R0903
|
# pylint: disable=R0903
|
||||||
class WhisperAI:
|
class WhisperAI:
|
||||||
"""
|
"""
|
||||||
Wrapper class for the Whisper speech recognition model with additional functionality.
|
Wrapper class for the Whisper speech recognition model with additional functionality.
|
||||||
|
|
||||||
This class provides a high-level interface for transcribing audio files using the Whisper
|
This class provides a high-level interface for transcribing audio files using the Whisper
|
||||||
speech recognition model. It encapsulates the model instantiation and transcription process,
|
speech recognition model. It encapsulates the model instantiation and transcription process,
|
||||||
allowing users to easily transcribe audio files and iterate over the resulting segments.
|
allowing users to easily transcribe audio files and iterate over the resulting segments.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
```python
|
```python
|
||||||
whisper = WhisperAI(model_args, transcribe_args)
|
whisper = WhisperAI(model_args, transcribe_args)
|
||||||
|
|
||||||
# Transcribe an audio file and iterate over the segments
|
# Transcribe an audio file and iterate over the segments
|
||||||
for segment in whisper.transcribe(audio_path):
|
for segment in whisper.transcribe(audio_path):
|
||||||
# Process each transcription segment
|
# Process each transcription segment
|
||||||
print(segment)
|
print(segment)
|
||||||
```
|
```
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
- model_args: Arguments to pass to WhisperModel initialize method
|
- model_args: Arguments to pass to WhisperModel initialize method
|
||||||
- model_size_or_path (str): The name of the Whisper model to use.
|
- model_size_or_path (str): The name of the Whisper model to use.
|
||||||
- device (str): The device to use for computation ("cpu", "cuda", "auto").
|
- device (str): The device to use for computation ("cpu", "cuda", "auto").
|
||||||
- compute_type (str): The type to use for computation.
|
- compute_type (str): The type to use for computation.
|
||||||
See https://opennmt.net/CTranslate2/quantization.html.
|
See https://opennmt.net/CTranslate2/quantization.html.
|
||||||
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
|
- transcribe_args (dict): Additional arguments to pass to the transcribe method.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
|
- model (faster_whisper.WhisperModel): The underlying Whisper speech recognition model.
|
||||||
- transcribe_args (dict): Additional arguments used for transcribe method.
|
- transcribe_args (dict): Additional arguments used for transcribe method.
|
||||||
|
|
||||||
Methods:
|
Methods:
|
||||||
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
|
- transcribe(audio_path): Transcribes an audio file and yields the resulting segments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, model_args: dict, transcribe_args: dict):
|
def __init__(self, model_args: dict, transcribe_args: dict):
|
||||||
self.model = faster_whisper.WhisperModel(**model_args)
|
self.model = faster_whisper.WhisperModel(**model_args)
|
||||||
self.transcribe_args = transcribe_args
|
self.transcribe_args = transcribe_args
|
||||||
|
|
||||||
def transcribe(self, audio_path: str):
|
def transcribe(self, audio_path: str):
|
||||||
"""
|
"""
|
||||||
Transcribes the specified audio file and yields the resulting segments.
|
Transcribes the specified audio file and yields the resulting segments.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
- audio_path (str): The path to the audio file for transcription.
|
- audio_path (str): The path to the audio file for transcription.
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
- faster_whisper.TranscriptionSegment: An individual transcription segment.
|
- faster_whisper.TranscriptionSegment: An individual transcription segment.
|
||||||
"""
|
"""
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
|
segments, info = self.model.transcribe(audio_path, **self.transcribe_args)
|
||||||
warnings.filterwarnings("default")
|
warnings.filterwarnings("default")
|
||||||
|
|
||||||
# Same precision as the Whisper timestamps.
|
# Same precision as the Whisper timestamps.
|
||||||
total_duration = round(info.duration, 2)
|
total_duration = round(info.duration, 2)
|
||||||
|
|
||||||
with tqdm(total=total_duration, unit=" seconds") as pbar:
|
with tqdm(total=total_duration, unit=" seconds") as pbar:
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
yield segment
|
yield segment
|
||||||
pbar.update(segment.end - segment.start)
|
pbar.update(segment.end - segment.start)
|
||||||
pbar.update(0)
|
pbar.update(0)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[bazarr]
|
[bazarr]
|
||||||
url = http://1.1.1.1
|
url = http://1.1.1.1
|
||||||
token = djfkjadncdfjkanvfjkvandfj
|
token = djfkjadncdfjkanvfjkvandfj
|
||||||
[sonarr]
|
[sonarr]
|
||||||
url = http://2.2.2.2:8989
|
url = http://2.2.2.2:8989
|
||||||
token = dfifdmnajcdnjcvaldnjlk
|
token = dfifdmnajcdnjcvaldnjlk
|
@ -1,3 +1,3 @@
|
|||||||
faster-whisper==0.10.0
|
faster-whisper==0.10.0
|
||||||
tqdm==4.56.0
|
tqdm==4.56.0
|
||||||
ffmpeg-python==0.2.0
|
ffmpeg-python==0.2.0
|
38
setup.py
38
setup.py
@ -1,19 +1,19 @@
|
|||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
version="1.0",
|
version="1.0",
|
||||||
name="bazarr-ai-sub-generator",
|
name="bazarr-ai-sub-generator",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
py_modules=["bazarr-ai-sub-generator"],
|
py_modules=["bazarr-ai-sub-generator"],
|
||||||
author="Karl Hudgell",
|
author="Karl Hudgell",
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'faster-whisper',
|
'faster-whisper',
|
||||||
'tqdm',
|
'tqdm',
|
||||||
'ffmpeg-python'
|
'ffmpeg-python'
|
||||||
],
|
],
|
||||||
description="Automatically generate and embed subtitles into your videos",
|
description="Automatically generate and embed subtitles into your videos",
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
|
'console_scripts': ['bazarr-ai-sub-generator=bazarr-ai-sub-generator.cli:main'],
|
||||||
},
|
},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user