Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
40671f0
1
Parent(s):
d056195
trying to build simple trasncribe - testing
Browse files
infer.py
CHANGED
@@ -4,110 +4,96 @@ import soundfile as sf
|
|
4 |
from fastapi import FastAPI, File, UploadFile
|
5 |
import uvicorn
|
6 |
import os
|
7 |
-
import logging
|
8 |
from datetime import datetime
|
9 |
|
10 |
-
# Ensure the log directory exists
|
11 |
log_directory = "/app/logs"
|
12 |
os.makedirs(log_directory, exist_ok=True)
|
13 |
|
14 |
-
# Set up logging
|
15 |
-
log_file_path = os.path.join(log_directory, "transcription_log.log")
|
16 |
-
logging.basicConfig(
|
17 |
-
filename=log_file_path,
|
18 |
-
format="%(asctime)s - %(levelname)s - %(message)s",
|
19 |
-
level=logging.INFO
|
20 |
-
)
|
21 |
-
|
22 |
# Initialize FastAPI app
|
23 |
app = FastAPI()
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
|
28 |
# Load the Whisper model and processor
|
29 |
model_name = "openai/whisper-base"
|
30 |
-
|
31 |
|
32 |
try:
|
33 |
processor = WhisperProcessor.from_pretrained(model_name)
|
34 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
35 |
-
|
36 |
except Exception as e:
|
37 |
-
|
38 |
raise e
|
39 |
|
40 |
# Move model to the appropriate device (GPU if available)
|
41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
42 |
model.to(device)
|
43 |
-
|
44 |
|
45 |
|
46 |
@app.post("/transcribe/")
|
47 |
async def transcribe_audio(file: UploadFile = File(...)):
|
48 |
-
#
|
49 |
-
|
50 |
-
start_time = datetime.now()
|
51 |
|
52 |
# Save the uploaded file
|
53 |
file_location = f"temp_{file.filename}"
|
54 |
try:
|
55 |
with open(file_location, "wb+") as f:
|
56 |
f.write(await file.read())
|
57 |
-
|
58 |
except Exception as e:
|
59 |
-
|
60 |
return {"error": f"Error saving the file: {e}"}
|
61 |
|
62 |
# Load the audio file and preprocess it
|
63 |
try:
|
64 |
audio_input, _ = sf.read(file_location)
|
65 |
-
|
66 |
|
67 |
inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
|
68 |
-
|
69 |
except Exception as e:
|
70 |
-
|
71 |
return {"error": f"Error processing the audio file: {e}"}
|
72 |
|
73 |
# Move inputs to the same device as the model
|
74 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
75 |
-
|
76 |
|
77 |
# Generate the transcription
|
78 |
try:
|
79 |
with torch.no_grad():
|
80 |
predicted_ids = model.generate(inputs["input_features"])
|
81 |
-
|
82 |
except Exception as e:
|
83 |
-
|
84 |
return {"error": f"Error during transcription generation: {e}"}
|
85 |
|
86 |
# Decode the transcription
|
87 |
try:
|
88 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
89 |
-
|
90 |
except Exception as e:
|
91 |
-
|
92 |
return {"error": f"Error decoding the transcription: {e}"}
|
93 |
|
94 |
# Clean up the temporary file
|
95 |
try:
|
96 |
os.remove(file_location)
|
97 |
-
|
98 |
except Exception as e:
|
99 |
-
|
100 |
-
|
101 |
-
end_time = datetime.now()
|
102 |
-
time_taken = end_time - start_time
|
103 |
-
logging.info(f"Transcription completed in {time_taken.total_seconds()} seconds.")
|
104 |
|
105 |
-
return {"transcription": transcription
|
106 |
|
107 |
|
108 |
if __name__ == "__main__":
|
109 |
-
#
|
110 |
-
|
111 |
|
112 |
# Run the FastAPI app on the default port (7860)
|
113 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
4 |
from fastapi import FastAPI, File, UploadFile
|
5 |
import uvicorn
|
6 |
import os
|
|
|
7 |
from datetime import datetime
|
8 |
|
9 |
+
# Ensure the log directory exists (optional if needed)
|
10 |
log_directory = "/app/logs"
|
11 |
os.makedirs(log_directory, exist_ok=True)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# Initialize FastAPI app
|
14 |
app = FastAPI()
|
15 |
|
16 |
+
# Print initialization of the application
|
17 |
+
print("FastAPI application started.")
|
18 |
|
19 |
# Load the Whisper model and processor
|
20 |
model_name = "openai/whisper-base"
|
21 |
+
print(f"Loading Whisper model: {model_name}")
|
22 |
|
23 |
try:
|
24 |
processor = WhisperProcessor.from_pretrained(model_name)
|
25 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
26 |
+
print(f"Model {model_name} successfully loaded.")
|
27 |
except Exception as e:
|
28 |
+
print(f"Error loading the model: {e}")
|
29 |
raise e
|
30 |
|
31 |
# Move model to the appropriate device (GPU if available)
|
32 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
model.to(device)
|
34 |
+
print(f"Model is using device: {device}")
|
35 |
|
36 |
|
37 |
@app.post("/transcribe/")
|
38 |
async def transcribe_audio(file: UploadFile = File(...)):
|
39 |
+
# Print file upload start
|
40 |
+
print(f"Received audio file: {file.filename}")
|
|
|
41 |
|
42 |
# Save the uploaded file
|
43 |
file_location = f"temp_{file.filename}"
|
44 |
try:
|
45 |
with open(file_location, "wb+") as f:
|
46 |
f.write(await file.read())
|
47 |
+
print(f"File saved to: {file_location}")
|
48 |
except Exception as e:
|
49 |
+
print(f"Error saving the file: {e}")
|
50 |
return {"error": f"Error saving the file: {e}"}
|
51 |
|
52 |
# Load the audio file and preprocess it
|
53 |
try:
|
54 |
audio_input, _ = sf.read(file_location)
|
55 |
+
print(f"Audio file {file.filename} successfully read.")
|
56 |
|
57 |
inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
|
58 |
+
print(f"Audio file preprocessed for transcription.")
|
59 |
except Exception as e:
|
60 |
+
print(f"Error processing the audio file: {e}")
|
61 |
return {"error": f"Error processing the audio file: {e}"}
|
62 |
|
63 |
# Move inputs to the same device as the model
|
64 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
65 |
+
print("Inputs moved to the appropriate device.")
|
66 |
|
67 |
# Generate the transcription
|
68 |
try:
|
69 |
with torch.no_grad():
|
70 |
predicted_ids = model.generate(inputs["input_features"])
|
71 |
+
print("Transcription successfully generated.")
|
72 |
except Exception as e:
|
73 |
+
print(f"Error during transcription generation: {e}")
|
74 |
return {"error": f"Error during transcription generation: {e}"}
|
75 |
|
76 |
# Decode the transcription
|
77 |
try:
|
78 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
79 |
+
print("Transcription successfully decoded.")
|
80 |
except Exception as e:
|
81 |
+
print(f"Error decoding the transcription: {e}")
|
82 |
return {"error": f"Error decoding the transcription: {e}"}
|
83 |
|
84 |
# Clean up the temporary file
|
85 |
try:
|
86 |
os.remove(file_location)
|
87 |
+
print(f"Temporary file {file_location} deleted.")
|
88 |
except Exception as e:
|
89 |
+
print(f"Error deleting the temporary file: {e}")
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
return {"transcription": transcription}
|
92 |
|
93 |
|
94 |
if __name__ == "__main__":
|
95 |
+
# Print when starting the FastAPI server
|
96 |
+
print("Starting FastAPI server with Uvicorn...")
|
97 |
|
98 |
# Run the FastAPI app on the default port (7860)
|
99 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|