qwen32b / app.py
druvx13's picture
Create app.py
7de9288 verified
raw
history blame
1.78 kB
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from huggingface_hub import InferenceClient
from pydantic import BaseModel
app = FastAPI()
# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
class ChatRequest(BaseModel):
message: str
history: list[tuple[str, str]]
system_message: str
max_tokens: int
temperature: float
top_p: float
def generate_response(messages, max_tokens, temperature, top_p):
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
yield chunk.choices[0].delta.content or ""
@app.post("/api/chat")
async def chat_stream(request: ChatRequest):
try:
messages = [{"role": "system", "content": request.system_message}]
for user_msg, assistant_msg in request.history:
messages.extend([
{"role": "user", "content": user_msg},
{"role": "assistant", "content": assistant_msg}
])
messages.append({"role": "user", "content": request.message})
return StreamingResponse(
generate_response(
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p
),
media_type="text/event-stream"
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)