import os
import requests
import joblib
import logging
import zipfile
import pandas as pd
import numpy as np
import warnings
from flask import Flask, request, jsonify
from flask_cors import CORS
# Suppress sklearn warnings
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
# Logging setup
logging.basicConfig(level=logging.INFO)
# Load URLs from environment
MODEL_URLS = {
"DIABETES_MODEL": os.getenv("DIABETES_MODEL_URL"),
"SCALER": os.getenv("SCALER_URL"),
"MULTI_MODEL": os.getenv("MULTI_MODEL_URL"),
}
# Model ZIP filenames
MODEL_PATHS = {
"DIABETES_MODEL": "finaliseddiabetes_model.zip",
"SCALER": "finalisedscaler.zip",
"MULTI_MODEL": "nodiabetes.zip",
}
# Extracted model file names
EXTRACTED_MODELS = {
"DIABETES_MODEL": "finaliseddiabetes_model.joblib",
"SCALER": "finalisedscaler.joblib",
"MULTI_MODEL": "nodiabetes.joblib",
}
TMP_DIR = "/tmp"
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/')
def index():
return """
Welcome to the Diabetes Health Predictor API 👋
This Hugging Face Space provides health risk predictions including diabetes, hypertension, stroke, and cardiovascular conditions.
Use the /predict
endpoint via POST request to get started with your health insights!
"""
def download_model(url, zip_filename):
zip_path = os.path.join(TMP_DIR, zip_filename)
if not url:
logging.error(f"URL for {zip_filename} is missing!")
return False
try:
response = requests.get(url, allow_redirects=True)
if response.status_code == 200:
with open(zip_path, 'wb') as f:
f.write(response.content)
logging.info(f"Downloaded {zip_filename} successfully.")
return True
else:
logging.error(f"Failed to download {zip_filename}. HTTP Status: {response.status_code}")
return False
except Exception as e:
logging.error(f"Error downloading {zip_filename}: {e}")
return False
def extract_if_needed(zip_filename, extracted_filename):
zip_path = os.path.join(TMP_DIR, zip_filename)
extracted_path = os.path.join(TMP_DIR, extracted_filename)
if os.path.exists(extracted_path):
logging.info(f"{extracted_filename} already exists. Skipping extraction.")
return True
if not os.path.exists(zip_path):
logging.error(f"Zip file missing: {zip_path}")
return False
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(TMP_DIR)
logging.info(f"Extracted {zip_filename} to {TMP_DIR}")
return True
except Exception as e:
logging.error(f"Error extracting {zip_filename}: {e}")
return False
def load_model(model_filename):
model_path = os.path.join(TMP_DIR, model_filename)
if not os.path.exists(model_path):
logging.error(f"Model file not found: {model_path}")
return None
try:
model = joblib.load(model_path)
logging.info(f"Loaded {model_filename} successfully.")
return model
except Exception as e:
logging.error(f"Error loading {model_filename}: {e}")
return None
def initialize_models():
models = {}
for model_key, zip_filename in MODEL_PATHS.items():
extracted_filename = EXTRACTED_MODELS[model_key]
url = MODEL_URLS.get(model_key)
zip_path = os.path.join(TMP_DIR, zip_filename)
if not os.path.exists(zip_path):
if not download_model(url, zip_filename):
continue
if not extract_if_needed(zip_filename, extracted_filename):
continue
models[model_key] = load_model(extracted_filename)
return models
models = initialize_models()
FEATURE_ORDER = [
'Pregnancies', 'Glucose', 'BloodPressure', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age'
]
def validate_input(value, input_type=float, min_value=0, max_value=None):
try:
value = input_type(value)
if value < min_value:
return None
if max_value is not None and value > max_value:
return None
return value
except (ValueError, TypeError):
return None
def validate_blood_pressure(systolic, diastolic):
systolic = validate_input(systolic, float, 0, 300)
diastolic = validate_input(diastolic, float, 0, 200)
if systolic is None or diastolic is None:
return None, None
return systolic, diastolic
def validate_gender(gender):
if isinstance(gender, str) and gender.lower() in ['male', 'female']:
return 1 if gender.lower() == 'male' else 0
return None
def calculate_diabetes_pedigree(family_history, first_degree=0, second_degree=0):
if not family_history:
return 0.0
genetic_contribution = (first_degree * 0.5) + (second_degree * 0.25)
return min(genetic_contribution, 1.0)
def get_multi_condition_predictions(model, df):
try:
predictions = model.predict(df)[0]
probs_list = model.predict_proba(df)
return {
'hypertension': bool(predictions[0]),
'cardiovascular': float(probs_list[1][0][1]),
'stroke': float(probs_list[2][0][1]),
'diabetes': float(probs_list[3][0][1])
}
except Exception as e:
logging.error(f"Error in multi-condition prediction: {str(e)}")
return None
def get_diabetes_prediction(model, df):
try:
prediction = model.predict(df)[0]
probability = float(model.predict_proba(df)[0][1] * 100)
return 'Diabetes' if prediction else 'No Diabetes', probability
except Exception as e:
logging.error(f"Error in diabetes prediction: {str(e)}")
return None, 0.0
@app.route('/health', methods=['GET'])
def health_check():
return jsonify({'status': 'healthy', 'message': 'Service is running'})
@app.route('/predict', methods=['POST'])
def predict_health():
try:
data = request.get_json()
logging.info(f"Received data: {data}")
if not data:
return jsonify({'status': 'error', 'error': 'Invalid JSON payload'}), 400
gender = validate_gender(data.get('gender'))
if gender is None:
return jsonify({'status': 'error', 'error': 'Invalid gender value. Must be \"male\" or \"female\"'}), 400
systolic, diastolic = validate_blood_pressure(data.get('systolic'), data.get('diastolic'))
if systolic is None or diastolic is None:
return jsonify({'status': 'error', 'error': 'Invalid blood pressure values'}), 400
age = validate_input(data.get('age'), float, 0, 120)
glucose = validate_input(data.get('glucose'), float, 0, 1000)
bmi = validate_input(data.get('bmi'), float, 0, 100)
if any(v is None for v in [age, glucose, bmi]):
return jsonify({'status': 'error', 'error': 'Invalid values for age, glucose, or BMI'}), 400
use_multi_condition = systolic < 90 or diastolic < 60
if use_multi_condition:
df_multi = pd.DataFrame([{
'Age': age,
'Gender': gender,
'Systolic_bp': systolic,
'Diastolic_bp': diastolic,
'Glucose': glucose,
'BMI': bmi
}])
results = get_multi_condition_predictions(models['MULTI_MODEL'], df_multi)
if results is None:
return jsonify({'status': 'error', 'error': 'Error in multi-condition prediction'}), 500
return jsonify({
'status': 'success',
'model': 'multi-condition',
'predictions': {
'hypertension': results['hypertension'],
'cardiovascular_risk': results['cardiovascular'],
'stroke_risk': results['stroke'],
'diabetes_risk': results['diabetes']
}
})
pregnancies = validate_input(data.get('pregnancies', 0 if gender == 1 else None), float, 0, 20)
insulin = validate_input(data.get('insulin'), float, 0, 1000)
family_history = data.get('family_history', False)
first_degree = validate_input(data.get('first_degree_relatives', 0), float, 0, 10)
second_degree = validate_input(data.get('second_degree_relatives', 0), float, 0, 20)
diabetes_pedigree = calculate_diabetes_pedigree(
family_history,
first_degree if first_degree is not None else 0,
second_degree if second_degree is not None else 0
)
if any(v is None for v in [pregnancies, insulin]):
return jsonify({'status': 'error', 'error': 'Invalid values for pregnancies or insulin'}), 400
df_diabetes = pd.DataFrame([{
'Pregnancies': pregnancies,
'Glucose': glucose,
'BloodPressure': systolic,
'Insulin': insulin,
'BMI': bmi,
'DiabetesPedigreeFunction': diabetes_pedigree,
'Age': age
}])
df_diabetes = df_diabetes[FEATURE_ORDER]
df_scaled = models['SCALER'].transform(df_diabetes)
prediction, probability = get_diabetes_prediction(models['DIABETES_MODEL'], df_scaled)
return jsonify({
'status': 'success',
'model': 'diabetes',
'prediction': prediction,
'probability': probability,
'risk_level': 'HIGH' if probability > 70 else 'MODERATE' if probability > 40 else 'LOW'
})
except Exception as e:
logging.error(f"Error: {e}")
return jsonify({'status': 'error', 'error': str(e)}), 500
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)