main.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import os
  2. import warnings
  3. import tempfile
  4. import time
  5. from utils.files import filename, write_srt
  6. from utils.ffmpeg import get_audio, add_subtitles_to_mp4
  7. from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
  8. from utils.sonarr import update_show_in_sonarr
  9. from utils.whisper import WhisperAI
  10. def process(args: dict):
  11. model_name: str = args.pop("model")
  12. language: str = args.pop("language")
  13. sample_interval: str = args.pop("sample_interval")
  14. audio_channel: str = args.pop("audio_channel")
  15. if model_name.endswith(".en"):
  16. warnings.warn(
  17. f"{model_name} is an English-only model, forcing English detection."
  18. )
  19. args["language"] = "en"
  20. # if translate task used and language argument is set, then use it
  21. elif language != "auto":
  22. args["language"] = language
  23. model_args = {}
  24. model_args["model_size_or_path"] = model_name
  25. model_args["device"] = args.pop("device")
  26. model_args["compute_type"] = args.pop("compute_type")
  27. list_of_episodes_needing_subtitles = get_wanted_episodes()
  28. print(
  29. f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles."
  30. )
  31. for episode in list_of_episodes_needing_subtitles["data"]:
  32. print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
  33. episode_data = get_episode_details(episode["sonarrEpisodeId"])
  34. audios = get_audio([episode_data["path"]], audio_channel, sample_interval)
  35. subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
  36. add_subtitles_to_mp4(subtitles)
  37. update_show_in_sonarr(episode["sonarrSeriesId"])
  38. time.sleep(5)
  39. sync_series()
  40. def get_subtitles(
  41. audio_paths: list, output_dir: str, model_args: dict, transcribe_args: dict
  42. ):
  43. model = WhisperAI(model_args, transcribe_args)
  44. subtitles_path = {}
  45. for path, audio_path in audio_paths.items():
  46. print(f"Generating subtitles for {filename(path)}... This might take a while.")
  47. srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
  48. segments = model.transcribe(audio_path)
  49. with open(srt_path, "w", encoding="utf-8") as srt:
  50. write_srt(segments, file=srt)
  51. subtitles_path[path] = srt_path
  52. return subtitles_path