import torch import librosa import gradio as gr from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM # Load models from the Space or from Hugging Face Hub whisper_model = WhisperForConditionalGeneration.from_pretrained("donnamae/whisper-finetuned-cebuano-accent", token=True) whisper_processor = WhisperProcessor.from_pretrained("donnamae/whisper-finetuned-cebuano-accent", token=True) code_tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7b-Instruct-hf") code_model = AutoModelForCausalLM.from_pretrained( "meta-llama/CodeLlama-7b-Instruct-hf", torch_dtype="auto", device_map="auto", trust_remote_code=True ).to("cuda" if torch.cuda.is_available() else "cpu") def transcribe_and_generate(audio): audio_data, sr = librosa.load(audio, sr=16000) input_features = whisper_processor(audio_data, sampling_rate=sr, return_tensors="pt").input_features predicted_ids = whisper_model.generate(input_features) transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] # Format prompt for code generation prompt = f"# Task: {transcription.strip()}\n\n```python\n" inputs = code_tokenizer(prompt, return_tensors="pt").to(code_model.device) outputs = code_model.generate(**inputs, max_length=256) generated_text = code_tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract clean code generated_code = generated_text.replace(prompt, "").strip().split("```")[0] return transcription, generated_code demo = gr.Interface( fn=transcribe_and_generate, inputs=gr.Audio(type="filepath"), outputs=[gr.Text(label="Transcribed Command"), gr.Code(label="Generated Code")], title="Voice-to-Code Generator", description="Speak your coding command. The system will transcribe and generate the corresponding code." ) demo.launch(share=True)