add custom language support

This commit is contained in:
Alistair Bahr 2023-03-10 19:55:06 +01:00 committed by Miguel Piedrafita
parent b862a64ffa
commit 6462cd4d56

View File

@ -25,19 +25,26 @@ def main():
parser.add_argument("--task", type=str, default="transcribe", choices=[ parser.add_argument("--task", type=str, default="transcribe", choices=[
"transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"],
help="What is the origin language of the video? If unset, it is detected automatically.")
args = parser.parse_args().__dict__ args = parser.parse_args().__dict__
model_name: str = args.pop("model") model_name: str = args.pop("model")
output_dir: str = args.pop("output_dir") output_dir: str = args.pop("output_dir")
output_srt: bool = args.pop("output_srt") output_srt: bool = args.pop("output_srt")
srt_only: bool = args.pop("srt_only") srt_only: bool = args.pop("srt_only")
language: str = args.pop("language")
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
if model_name.endswith(".en"): if model_name.endswith(".en"):
warnings.warn( warnings.warn(
f"{model_name} is an English-only model, forcing English detection.") f"{model_name} is an English-only model, forcing English detection.")
args["language"] = "en" args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language
model = whisper.load_model(model_name) model = whisper.load_model(model_name)
audios = get_audio(args.pop("video")) audios = get_audio(args.pop("video"))
subtitles = get_subtitles( subtitles = get_subtitles(