|
import io |
|
from typing import Tuple |
|
import threading |
|
from multiprocessing import Queue |
|
from queue import Empty |
|
from faster_whisper import WhisperModel |
|
|
|
import logging |
|
import sys |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
|
handlers=[logging.StreamHandler(sys.stdout)], |
|
) |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class AudioTranscriber(threading.Thread): |
|
def __init__( |
|
self, |
|
audio_queue: "Queue[Tuple[io.BytesIO, str]]", |
|
text_queue: "Queue[Tuple[str, str]]", |
|
language: str = "en", |
|
confidence_threshold: float = 0.5, |
|
device_index: int = 0, |
|
): |
|
super().__init__() |
|
self.audio_queue = audio_queue |
|
self.action_queue = text_queue |
|
self.daemon = True |
|
self.language = language |
|
self.confidence_threshold = confidence_threshold |
|
self.transcriber = WhisperModel( |
|
"large", |
|
device="cuda", |
|
device_index=device_index, |
|
compute_type="int8", |
|
) |
|
|
|
def run(self): |
|
while True: |
|
try: |
|
|
|
audio_data, session_id = self.audio_queue.get(timeout=1) |
|
|
|
segments, _ = self.transcriber.transcribe( |
|
audio_data, language=self.language |
|
) |
|
|
|
|
|
for segment in segments: |
|
if segment.no_speech_prob <= self.confidence_threshold: |
|
self.action_queue.put((segment.text, session_id)) |
|
|
|
logger.info( |
|
f"[Thread {threading.get_ident()}] [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}" |
|
) |
|
else: |
|
self.action_queue.put(("", session_id)) |
|
|
|
except Empty: |
|
continue |
|
except Exception as e: |
|
logger.error(f"Error processing audio chunk: {e}") |
|
|