main.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import os
  2. import warnings
  3. import tempfile
  4. import time
  5. from utils.files import filename, write_srt
  6. from utils.ffmpeg import get_audio, add_subs_new
  7. from utils.bazarr import get_wanted_episodes, get_episode_details, sync_series
  8. from utils.sonarr import update_show_in_soarr
  9. from utils.whisper import WhisperAI
  10. def process(args: dict):
  11. model_name: str = args.pop("model")
  12. # output_dir: str = args.pop("output_dir")
  13. # output_srt: bool = args.pop("output_srt")
  14. # srt_only: bool = args.pop("srt_only")
  15. language: str = args.pop("language")
  16. sample_interval: str = args.pop("sample_interval")
  17. audio_channel: str = args.pop('audio_channel')
  18. # os.makedirs(output_dir, exist_ok=True)
  19. if model_name.endswith(".en"):
  20. warnings.warn(
  21. f"{model_name} is an English-only model, forcing English detection.")
  22. args["language"] = "en"
  23. # if translate task used and language argument is set, then use it
  24. elif language != "auto":
  25. args["language"] = language
  26. model_args = {}
  27. model_args["model_size_or_path"] = model_name
  28. model_args["device"] = args.pop("device")
  29. model_args["compute_type"] = args.pop("compute_type")
  30. list_of_episodes_needing_subtitles = get_wanted_episodes()
  31. print(f"Found {list_of_episodes_needing_subtitles['total']} episodes needing subtitles.")
  32. for episode in list_of_episodes_needing_subtitles['data']:
  33. print(f"Processing {episode['seriesTitle']} - {episode['episode_number']}")
  34. episode_data = get_episode_details(episode['sonarrEpisodeId'])
  35. audios = get_audio([episode_data['path']], audio_channel, sample_interval)
  36. # srt_output_dir = output_dir if output_srt or srt_only else tempfile.gettempdir()
  37. subtitles = get_subtitles(audios, tempfile.gettempdir(), model_args, args)
  38. # if srt_only:
  39. # return
  40. add_subs_new(subtitles)
  41. update_show_in_soarr(episode['sonarrSeriesId'])
  42. time.sleep(5)
  43. sync_series()
  44. def get_subtitles(audio_paths: list, output_dir: str,
  45. model_args: dict, transcribe_args: dict):
  46. model = WhisperAI(model_args, transcribe_args)
  47. subtitles_path = {}
  48. for path, audio_path in audio_paths.items():
  49. print(
  50. f"Generating subtitles for {filename(path)}... This might take a while."
  51. )
  52. srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
  53. segments = model.transcribe(audio_path)
  54. with open(srt_path, "w", encoding="utf-8") as srt:
  55. write_srt(segments, file=srt)
  56. subtitles_path[path] = srt_path
  57. return subtitles_path