commit 94b2ec372d48f6c7b5795d70bab76a0e885030fe Author: Miguel Piedrafita Date: Wed Sep 28 01:57:08 2022 +0100 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1faac5f --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +dist +.DS_Store +*.egg-info +auto_subtitle/__pycache__ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cb275c7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Miguel Piedrafita + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ec22b9 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# Automatic subtitles in your videos + +This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. + +## Installation + +To get started, you'll need Python 3.7 or newer. Install the binary by running the following command: + + pip install git+https://github.com/m1guelpf/auto_subtitle.git + +You'll also need to install [`ffmpeg`](https://ffmpeg.org/), which is available from most package managers: + +```bash +# on Ubuntu or Debian +sudo apt update && sudo apt install ffmpeg + +# on MacOS using Homebrew (https://brew.sh/) +brew install ffmpeg + +# on Windows using Chocolatey (https://chocolatey.org/) +choco install ffmpeg +``` + +## Usage + +The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles. + + auto_subtitle /path/to/video.mp4 -o subtitled/ + +The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`. + + auto_subtitle /path/to/video.mp4 --model medium + +Adding `--task translate` will translate the subtitles into English: + + auto_subtitle /path/to/video.mp4 --task translate + +Run the following to view all available options: + + auto_subtitle --help + +## License + +This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. diff --git a/auto_subtitle/__init__.py b/auto_subtitle/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py new file mode 100644 index 0000000..5f43118 --- /dev/null +++ b/auto_subtitle/cli.py @@ -0,0 +1,101 @@ +import os +import ffmpeg +import whisper +import argparse +import warnings +import tempfile +from .utils import filename, str2bool, write_srt + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("video", nargs="+", type=str, + help="paths to video files to transcribe") + parser.add_argument("--model", default="small", + choices=whisper.available_models(), help="name of the Whisper model to use") + parser.add_argument("--output_dir", "-o", type=str, + default=".", help="directory to save the outputs") + parser.add_argument("--verbose", type=str2bool, default=False, + help="Whether to print out the progress and debug messages") + + parser.add_argument("--task", type=str, default="transcribe", choices=[ + "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") + + args = parser.parse_args().__dict__ + model_name: str = args.pop("model") + output_dir: str = args.pop("output_dir") + os.makedirs(output_dir, exist_ok=True) + + if model_name.endswith(".en"): + warnings.warn( + f"{model_name} is an English-only model, forcing English detection.") + args["language"] = "en" + + model = whisper.load_model(model_name) + audios = get_audio(args.pop("video")) + subtitles = get_subtitles( + audios, lambda audio_path: model.transcribe(audio_path, **args) + ) + # bash command to download a youtube video with `youtube-dl` and save it as `video.mp4`: + # youtube-dl -f 22 -o video.mp4 https://www.youtube.com/watch?v=QH2-TGUlwu4 + + for path, srt_path in subtitles.items(): + out_path = os.path.join(output_dir, f"{filename(path)}.mp4") + + print(f"Adding subtitles to {filename(path)}...") + + video = ffmpeg.input(path) + audio = video.audio + + stderr = ffmpeg.concat( + video.filter('subtitles', srt_path, force_style="OutlineColour=&H40000000,BorderStyle=3"), audio, v=1, a=1 + ).output(out_path).run(quiet=True, overwrite_output=True) + + print(f"Saved subtitled video to {os.path.abspath(out_path)}.") + + +def get_audio(paths): + temp_dir = tempfile.gettempdir() + + audio_paths = {} + + for path in paths: + print(f"Extracting audio from {filename(path)}...") + output_path = os.path.join(temp_dir, f"{filename(path)}.wav") + + ffmpeg.input(path).output( + output_path, + acodec="pcm_s16le", ac=1, ar="16k" + ).run(quiet=True, overwrite_output=True) + + audio_paths[path] = output_path + + return audio_paths + + +def get_subtitles(audio_paths: list, transcribe: callable): + temp_dir = tempfile.gettempdir() + subtitles_path = {} + + for path, audio_path in audio_paths.items(): + srt_path = os.path.join(temp_dir, f"{filename(path)}.srt") + + print( + f"Generating subtitles for {filename(path)}... This might take a while." + ) + + warnings.filterwarnings("ignore") + result = transcribe(audio_path) + warnings.filterwarnings("default") + + with open(srt_path, "w", encoding="utf-8") as srt: + write_srt(result["segments"], file=srt) + + subtitles_path[path] = srt_path + + return subtitles_path + + +if __name__ == '__main__': + main() diff --git a/auto_subtitle/utils.py b/auto_subtitle/utils.py new file mode 100644 index 0000000..c63c3d5 --- /dev/null +++ b/auto_subtitle/utils.py @@ -0,0 +1,44 @@ +import os +from typing import Iterator, TextIO + + +def str2bool(string): + str2val = {"True": True, "False": False} + if string in str2val: + return str2val[string] + else: + raise ValueError( + f"Expected one of {set(str2val.keys())}, got {string}") + + +def format_timestamp(seconds: float, always_include_hours: bool = False): + assert seconds >= 0, "non-negative timestamp expected" + milliseconds = round(seconds * 1000.0) + + hours = milliseconds // 3_600_000 + milliseconds -= hours * 3_600_000 + + minutes = milliseconds // 60_000 + milliseconds -= minutes * 60_000 + + seconds = milliseconds // 1_000 + milliseconds -= seconds * 1_000 + + hours_marker = f"{hours}:" if always_include_hours or hours > 0 else "" + return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}" + + +def write_srt(transcript: Iterator[dict], file: TextIO): + for i, segment in enumerate(transcript, start=1): + print( + f"{i}\n" + f"{format_timestamp(segment['start'], always_include_hours=True)} --> " + f"{format_timestamp(segment['end'], always_include_hours=True)}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + + +def filename(path): + return os.path.splitext(os.path.basename(path))[0] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4fd6dc1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +youtube-dl +git+https://github.com/openai/whisper.git diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4b2305d --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +import os + +import pkg_resources +from setuptools import setup, find_packages + +setup( + version="1.0", + name="auto_subtitle", + packages=find_packages(), + py_modules=["auto_subtitle"], + author="Miguel Piedrafita", + install_requires=[ + 'youtube-dl', + 'whisper @ git+ssh://git@github.com/openai/whisper@main#egg=whisper' + ], + description="Automatically generate and embed subtitles into your videos", + entry_points={ + 'console_scripts': ['auto_subtitle=auto_subtitle.cli:main'], + }, + include_package_data=True, +)