cli.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import argparse
  2. from faster_whisper import available_models
  3. from utils.constants import LANGUAGE_CODES
  4. from main import process
  5. from utils.convert import str2bool, str2timeinterval
  6. def main():
  7. """
  8. Main entry point for the script.
  9. Parses command line arguments, processes the inputs using the specified options,
  10. and performs transcription or translation based on the specified task.
  11. """
  12. parser = argparse.ArgumentParser(
  13. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  14. parser.add_argument("--audio_channel", default="0",
  15. type=int, help="audio channel index to use")
  16. parser.add_argument("--sample_interval", type=str2timeinterval, default=None,
  17. help="generate subtitles for a specific \
  18. fragment of the video (e.g. 01:02:05-01:03:45)")
  19. parser.add_argument("--model", default="small",
  20. choices=available_models(), help="name of the Whisper model to use")
  21. parser.add_argument("--device", type=str, default="auto",
  22. choices=["cpu", "cuda", "auto"],
  23. help="Device to use for computation (\"cpu\", \"cuda\", \"auto\")")
  24. parser.add_argument("--compute_type", type=str, default="default", choices=[
  25. "int8", "int8_float32", "int8_float16", "int8_bfloat16",
  26. "int16", "float16", "bfloat16", "float32"],
  27. help="Type to use for computation. \
  28. See https://opennmt.net/CTranslate2/quantization.html.")
  29. parser.add_argument("--beam_size", type=int, default=5,
  30. help="model parameter, tweak to increase accuracy")
  31. parser.add_argument("--no_speech_threshold", type=float, default=0.6,
  32. help="model parameter, tweak to increase accuracy")
  33. parser.add_argument("--condition_on_previous_text", type=str2bool, default=True,
  34. help="model parameter, tweak to increase accuracy")
  35. parser.add_argument("--task", type=str, default="transcribe",
  36. choices=["transcribe", "translate"],
  37. help="whether to perform X->X speech recognition ('transcribe') \
  38. or X->English translation ('translate')")
  39. parser.add_argument("--language", type=str, default="auto",
  40. choices=LANGUAGE_CODES,
  41. help="What is the origin language of the video? \
  42. If unset, it is detected automatically.")
  43. args = parser.parse_args().__dict__
  44. process(args)
  45. if __name__ == '__main__':
  46. main()