Spaces:

Mistral-AI-Game-Jam
/

ParentalControl

Sleeping

ParentalControl / server /AudioTranscriber.py

GitLab CI

Update game build from GitLab CI

8d626cf 3 months ago

2.27 kB

	import io
	from typing import Tuple
	import threading
	from multiprocessing import Queue
	from queue import Empty
	from faster_whisper import WhisperModel

	import logging
	import sys

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	handlers=[logging.StreamHandler(sys.stdout)],
	)

	# Get a logger for your app
	logger = logging.getLogger(__name__)


	class AudioTranscriber(threading.Thread):
	def __init__(
	self,
	audio_queue: "Queue[Tuple[io.BytesIO, str]]",
	text_queue: "Queue[Tuple[str, str]]",
	language: str = "en",
	confidence_threshold: float = 0.5,
	device_index: int = 0,
	):
	super().__init__()
	self.audio_queue = audio_queue
	self.action_queue = text_queue
	self.daemon = True # Thread will exit when main program exits
	self.language = language
	self.confidence_threshold = confidence_threshold
	self.transcriber = WhisperModel(
	"large",
	device="cuda",
	device_index=device_index,
	compute_type="int8",
	)

	def run(self):
	while True:
	try:
	# Wait for 1 second before timing out and checking again
	audio_data, session_id = self.audio_queue.get(timeout=1)

	segments, _ = self.transcriber.transcribe(
	audio_data, language=self.language
	)

	# Put the transcription results in the output queue
	for segment in segments:
	if segment.no_speech_prob <= self.confidence_threshold:
	self.action_queue.put((segment.text, session_id))
	# Still print for debugging
	logger.info(
	f"[Thread {threading.get_ident()}] [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}"
	)
	else:
	self.action_queue.put(("", session_id))

	except Empty:
	continue # If queue is empty, continue waiting
	except Exception as e:
	logger.error(f"Error processing audio chunk: {e}")