MdEndan commited on
Commit
8c8d51f
·
verified ·
1 Parent(s): 23161e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -1
app.py CHANGED
@@ -1,3 +1,65 @@
1
  import gradio as gr
 
 
 
2
 
3
- gr.load("models/MdEndan/tinysketch-fine-tuned").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import sounddevice as sd # For microphone input
4
+ from diffusers import DiffusionPipeline
5
 
6
+ # Load the diffuser pipeline with LORA weights
7
+ pipeline = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
8
+ pipeline.load_lora_weights("MdEndan/tinysketch-fine-tuned")
9
+
10
+ def generate_image(text):
11
+ """Converts speech to text, generates an image using diffuser pipeline,
12
+ and displays the result."""
13
+
14
+ # Speech-to-text using a pre-trained pipeline (replace with your choice)
15
+ speech_pipe = pipeline("automatic-speech-recognition")
16
+ try:
17
+ # Record audio from microphone (adjust duration and sample rate if needed)
18
+ duration = 5 # Record for 5 seconds
19
+ fs = 16000 # Sample rate
20
+ print("Speak now...")
21
+ myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)
22
+ sd.wait()
23
+ print("Recording stopped")
24
+
25
+ # Convert audio to WAV for compatibility with some pipelines
26
+ sd.write("recording.wav", myrecording, fs)
27
+
28
+ # Transcribe speech
29
+ with open("recording.wav", "rb") as f:
30
+ audio_bytes = f.read()
31
+ speech_output = speech_pipe(audio_bytes, return_tensors="pt")["sequences"]
32
+ text = speech_output[0].tolist() # Extract the transcribed text
33
+
34
+ except Exception as e:
35
+ print(f"Error during speech recognition: {e}")
36
+ text = "Error: Speech recognition failed."
37
+
38
+ # Ensure text input is a string
39
+ if not isinstance(text, str):
40
+ text = str(text)
41
+
42
+ # Generate image using diffuser pipeline
43
+ try:
44
+ image = pipeline(text).images[0]
45
+ return image
46
+
47
+ except Exception as e:
48
+ print(f"Error during image generation: {e}")
49
+ return None
50
+
51
+ # Gradio interface with microphone and image display
52
+ interface = gr.Interface(
53
+ fn=generate_image,
54
+ inputs=gr.Audio(sources=["microphone"]),
55
+ outputs=gr.Image(thumbnail=True),
56
+ title="Speak & Create: Text-to-Image with Microphone Input (LORA)",
57
+ description="Speak your description and see an image generated using a fine-tuned model!",
58
+ )
59
+
60
+ # Handle potential errors during Gradio launch
61
+ try:
62
+ # Request access to the microphone (might require user permission)
63
+ interface.launch(share=True, capture_audio=True)
64
+ except Exception as e:
65
+ print(f"Error launching Gradio interface: {e}")