AshDavid12 commited on
Commit
40671f0
·
1 Parent(s): d056195

trying to build simple trasncribe - testing

Browse files
Files changed (1) hide show
  1. infer.py +24 -38
infer.py CHANGED
@@ -4,110 +4,96 @@ import soundfile as sf
4
  from fastapi import FastAPI, File, UploadFile
5
  import uvicorn
6
  import os
7
- import logging
8
  from datetime import datetime
9
 
10
- # Ensure the log directory exists
11
  log_directory = "/app/logs"
12
  os.makedirs(log_directory, exist_ok=True)
13
 
14
- # Set up logging
15
- log_file_path = os.path.join(log_directory, "transcription_log.log")
16
- logging.basicConfig(
17
- filename=log_file_path,
18
- format="%(asctime)s - %(levelname)s - %(message)s",
19
- level=logging.INFO
20
- )
21
-
22
  # Initialize FastAPI app
23
  app = FastAPI()
24
 
25
- # Log initialization of the application
26
- logging.info("FastAPI application started.")
27
 
28
  # Load the Whisper model and processor
29
  model_name = "openai/whisper-base"
30
- logging.info(f"Loading Whisper model: {model_name}")
31
 
32
  try:
33
  processor = WhisperProcessor.from_pretrained(model_name)
34
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
35
- logging.info(f"Model {model_name} successfully loaded.")
36
  except Exception as e:
37
- logging.error(f"Error loading the model: {e}")
38
  raise e
39
 
40
  # Move model to the appropriate device (GPU if available)
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
42
  model.to(device)
43
- logging.info(f"Model is using device: {device}")
44
 
45
 
46
  @app.post("/transcribe/")
47
  async def transcribe_audio(file: UploadFile = File(...)):
48
- # Log file upload start
49
- logging.info(f"Received audio file: {file.filename}")
50
- start_time = datetime.now()
51
 
52
  # Save the uploaded file
53
  file_location = f"temp_{file.filename}"
54
  try:
55
  with open(file_location, "wb+") as f:
56
  f.write(await file.read())
57
- logging.info(f"File saved to: {file_location}")
58
  except Exception as e:
59
- logging.error(f"Error saving the file: {e}")
60
  return {"error": f"Error saving the file: {e}"}
61
 
62
  # Load the audio file and preprocess it
63
  try:
64
  audio_input, _ = sf.read(file_location)
65
- logging.info(f"Audio file {file.filename} successfully read.")
66
 
67
  inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
68
- logging.info(f"Audio file preprocessed for transcription.")
69
  except Exception as e:
70
- logging.error(f"Error processing the audio file: {e}")
71
  return {"error": f"Error processing the audio file: {e}"}
72
 
73
  # Move inputs to the same device as the model
74
  inputs = {key: value.to(device) for key, value in inputs.items()}
75
- logging.info("Inputs moved to the appropriate device.")
76
 
77
  # Generate the transcription
78
  try:
79
  with torch.no_grad():
80
  predicted_ids = model.generate(inputs["input_features"])
81
- logging.info("Transcription successfully generated.")
82
  except Exception as e:
83
- logging.error(f"Error during transcription generation: {e}")
84
  return {"error": f"Error during transcription generation: {e}"}
85
 
86
  # Decode the transcription
87
  try:
88
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
89
- logging.info("Transcription successfully decoded.")
90
  except Exception as e:
91
- logging.error(f"Error decoding the transcription: {e}")
92
  return {"error": f"Error decoding the transcription: {e}"}
93
 
94
  # Clean up the temporary file
95
  try:
96
  os.remove(file_location)
97
- logging.info(f"Temporary file {file_location} deleted.")
98
  except Exception as e:
99
- logging.error(f"Error deleting the temporary file: {e}")
100
-
101
- end_time = datetime.now()
102
- time_taken = end_time - start_time
103
- logging.info(f"Transcription completed in {time_taken.total_seconds()} seconds.")
104
 
105
- return {"transcription": transcription, "processing_time_seconds": time_taken.total_seconds()}
106
 
107
 
108
  if __name__ == "__main__":
109
- # Log application start
110
- logging.info("Starting FastAPI server with Uvicorn...")
111
 
112
  # Run the FastAPI app on the default port (7860)
113
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
  from fastapi import FastAPI, File, UploadFile
5
  import uvicorn
6
  import os
 
7
  from datetime import datetime
8
 
9
+ # Ensure the log directory exists (optional if needed)
10
  log_directory = "/app/logs"
11
  os.makedirs(log_directory, exist_ok=True)
12
 
 
 
 
 
 
 
 
 
13
  # Initialize FastAPI app
14
  app = FastAPI()
15
 
16
+ # Print initialization of the application
17
+ print("FastAPI application started.")
18
 
19
  # Load the Whisper model and processor
20
  model_name = "openai/whisper-base"
21
+ print(f"Loading Whisper model: {model_name}")
22
 
23
  try:
24
  processor = WhisperProcessor.from_pretrained(model_name)
25
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
26
+ print(f"Model {model_name} successfully loaded.")
27
  except Exception as e:
28
+ print(f"Error loading the model: {e}")
29
  raise e
30
 
31
  # Move model to the appropriate device (GPU if available)
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
  model.to(device)
34
+ print(f"Model is using device: {device}")
35
 
36
 
37
  @app.post("/transcribe/")
38
  async def transcribe_audio(file: UploadFile = File(...)):
39
+ # Print file upload start
40
+ print(f"Received audio file: {file.filename}")
 
41
 
42
  # Save the uploaded file
43
  file_location = f"temp_{file.filename}"
44
  try:
45
  with open(file_location, "wb+") as f:
46
  f.write(await file.read())
47
+ print(f"File saved to: {file_location}")
48
  except Exception as e:
49
+ print(f"Error saving the file: {e}")
50
  return {"error": f"Error saving the file: {e}"}
51
 
52
  # Load the audio file and preprocess it
53
  try:
54
  audio_input, _ = sf.read(file_location)
55
+ print(f"Audio file {file.filename} successfully read.")
56
 
57
  inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
58
+ print(f"Audio file preprocessed for transcription.")
59
  except Exception as e:
60
+ print(f"Error processing the audio file: {e}")
61
  return {"error": f"Error processing the audio file: {e}"}
62
 
63
  # Move inputs to the same device as the model
64
  inputs = {key: value.to(device) for key, value in inputs.items()}
65
+ print("Inputs moved to the appropriate device.")
66
 
67
  # Generate the transcription
68
  try:
69
  with torch.no_grad():
70
  predicted_ids = model.generate(inputs["input_features"])
71
+ print("Transcription successfully generated.")
72
  except Exception as e:
73
+ print(f"Error during transcription generation: {e}")
74
  return {"error": f"Error during transcription generation: {e}"}
75
 
76
  # Decode the transcription
77
  try:
78
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
79
+ print("Transcription successfully decoded.")
80
  except Exception as e:
81
+ print(f"Error decoding the transcription: {e}")
82
  return {"error": f"Error decoding the transcription: {e}"}
83
 
84
  # Clean up the temporary file
85
  try:
86
  os.remove(file_location)
87
+ print(f"Temporary file {file_location} deleted.")
88
  except Exception as e:
89
+ print(f"Error deleting the temporary file: {e}")
 
 
 
 
90
 
91
+ return {"transcription": transcription}
92
 
93
 
94
  if __name__ == "__main__":
95
+ # Print when starting the FastAPI server
96
+ print("Starting FastAPI server with Uvicorn...")
97
 
98
  # Run the FastAPI app on the default port (7860)
99
  uvicorn.run(app, host="0.0.0.0", port=7860)