Пользователь
Можно использовать также https://github.com/SYSTRAN/faster-whisper
pip install faster-whisper
CLI: ./transcribe.py clip.mp4 >transcript.txt
./transcribe.py clip.mp4 >transcript.txt
#!/bin/env python3 import os import subprocess import sys import tempfile from faster_whisper import WhisperModel _, temp_wav = tempfile.mkstemp(suffix='.wav') subprocess.run(['ffmpeg', '-i', sys.argv[1], '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', '-y', temp_wav], check=True, capture_output=True) model = WhisperModel("medium", device="cpu", compute_type="int8") segments_raw, _ = model.transcribe( temp_wav, beam_size=5, language="en", word_timestamps=True, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500)) for seg in segments_raw: print(f"[{seg.start:2.2f} - {seg.end:2.2f}]", seg.text)
Можно использовать также https://github.com/SYSTRAN/faster-whisper
pip install faster-whisperCLI:
./transcribe.py clip.mp4 >transcript.txt