2025-02-23 16:22:33 +01:00
|
|
|
from transformers import pipeline
|
|
|
|
|
|
|
|
class WhisperTranscriber:
|
|
|
|
def __init__(self, model_name: str, device: str = "cuda"):
|
|
|
|
print("Loading Whisper model...")
|
|
|
|
self.pipe = pipeline(
|
|
|
|
task="automatic-speech-recognition",
|
|
|
|
model=model_name,
|
|
|
|
tokenizer=model_name,
|
|
|
|
device=0 if device == "cuda" else -1
|
|
|
|
)
|
|
|
|
print("Whisper model loaded.")
|
|
|
|
|
|
|
|
def transcribe(self, audio_file: str) -> str:
|
2025-02-23 21:13:04 +01:00
|
|
|
result = self.pipe(audio_file, batch_size=4, return_timestamps=True)
|
2025-02-23 16:22:33 +01:00
|
|
|
return result.get("text", "").strip()
|